mirror of
				https://github.com/carlosedp/cluster-monitoring.git
				synced 2025-10-26 10:23:04 +01:00 
			
		
		
		
	Remove old manifests. They are still available on legacy tag
This commit is contained in:
		
							parent
							
								
									4ee0c86a03
								
							
						
					
					
						commit
						de1c46dd63
					
				@ -1,6 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Secret
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager-main
 | 
			
		||||
data:
 | 
			
		||||
  alertmanager.yaml: Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0Kcm91dGU6CiAgZ3JvdXBfYnk6IFsnam9iJ10KICBncm91cF93YWl0OiAzMHMKICBncm91cF9pbnRlcnZhbDogNW0KICByZXBlYXRfaW50ZXJ2YWw6IDEyaAogIHJlY2VpdmVyOiAnbnVsbCcKICByb3V0ZXM6CiAgLSBtYXRjaDoKICAgICAgYWxlcnRuYW1lOiBEZWFkTWFuc1N3aXRjaAogICAgcmVjZWl2ZXI6ICdudWxsJwpyZWNlaXZlcnM6Ci0gbmFtZTogJ251bGwnCg==
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: Ingress
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  rules:
 | 
			
		||||
  - host: alertmanager.internal.carlosedp.com
 | 
			
		||||
    http:
 | 
			
		||||
      paths:
 | 
			
		||||
      - path: /
 | 
			
		||||
        backend:
 | 
			
		||||
          serviceName: alertmanager-main
 | 
			
		||||
          servicePort: web
 | 
			
		||||
 | 
			
		||||
@ -1,16 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  labels:
 | 
			
		||||
    alertmanager: main
 | 
			
		||||
  name: alertmanager-main
 | 
			
		||||
spec:
 | 
			
		||||
  type: NodePort
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: web
 | 
			
		||||
    nodePort: 30903
 | 
			
		||||
    port: 9093
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
    targetPort: web
 | 
			
		||||
  selector:
 | 
			
		||||
    alertmanager: main
 | 
			
		||||
@ -1,11 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: Alertmanager
 | 
			
		||||
metadata:
 | 
			
		||||
  name: main
 | 
			
		||||
  labels:
 | 
			
		||||
    alertmanager: main
 | 
			
		||||
spec:
 | 
			
		||||
  replicas: 1
 | 
			
		||||
  baseImage: carlosedp/alertmanager
 | 
			
		||||
  version: v0.14.0
 | 
			
		||||
 | 
			
		||||
@ -1,22 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: DaemonSet
 | 
			
		||||
metadata:
 | 
			
		||||
  name: arm-exporter
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: arm-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      name: arm-exporter
 | 
			
		||||
      labels:
 | 
			
		||||
        k8s-app: arm-exporter
 | 
			
		||||
    spec:
 | 
			
		||||
      hostNetwork: true
 | 
			
		||||
      containers:
 | 
			
		||||
      - image: carlosedp/arm_exporter
 | 
			
		||||
        name: arm-exporter
 | 
			
		||||
        ports:
 | 
			
		||||
        - name: http
 | 
			
		||||
          containerPort: 9243
 | 
			
		||||
          hostPort: 9243
 | 
			
		||||
@ -1,18 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
  labels:
 | 
			
		||||
    app: arm-exporter
 | 
			
		||||
    k8s-app: arm-exporter
 | 
			
		||||
  name: arm-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  type: ClusterIP
 | 
			
		||||
  clusterIP: None
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: http
 | 
			
		||||
    port: 9243
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
  selector:
 | 
			
		||||
    k8s-app: arm-exporter
 | 
			
		||||
 | 
			
		||||
@ -1,12 +0,0 @@
 | 
			
		||||
kind: PersistentVolumeClaim
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana-claim
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  accessModes:
 | 
			
		||||
    - ReadWriteMany
 | 
			
		||||
  resources:
 | 
			
		||||
    requests:
 | 
			
		||||
      storage: 5Gi
 | 
			
		||||
 | 
			
		||||
@ -1,32 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ConfigMap
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana-config
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
data:
 | 
			
		||||
  config.ini: |
 | 
			
		||||
    [database]
 | 
			
		||||
    path = /data/grafana.db
 | 
			
		||||
 | 
			
		||||
    [paths]
 | 
			
		||||
    data = /data
 | 
			
		||||
    logs = /data/log
 | 
			
		||||
    plugins = /data/plugins
 | 
			
		||||
 | 
			
		||||
    [session]
 | 
			
		||||
    provider = memory
 | 
			
		||||
 | 
			
		||||
    [auth.basic]
 | 
			
		||||
    enabled = false
 | 
			
		||||
 | 
			
		||||
    [auth.anonymous]
 | 
			
		||||
    enabled = false
 | 
			
		||||
 | 
			
		||||
    [smtp]
 | 
			
		||||
    enabled = true
 | 
			
		||||
    host = smtp-server.monitoring.svc:25 
 | 
			
		||||
    user =
 | 
			
		||||
    password =
 | 
			
		||||
    from_address = 'carlosedp@gmail.com'
 | 
			
		||||
    from_name = Grafana Alert
 | 
			
		||||
    skip_verify = true
 | 
			
		||||
@ -1,7 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Secret
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana-credentials
 | 
			
		||||
data:
 | 
			
		||||
  user: YWRtaW4=
 | 
			
		||||
  password: YWRtaW4=
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -1,12 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ConfigMap
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana-dashboards
 | 
			
		||||
data:
 | 
			
		||||
  dashboards.yaml: |+
 | 
			
		||||
    - name: '0'
 | 
			
		||||
      org_id: 1
 | 
			
		||||
      folder: ''
 | 
			
		||||
      type: file
 | 
			
		||||
      options:
 | 
			
		||||
        folder: /grafana-dashboard-definitions/0
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ConfigMap
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana-datasources
 | 
			
		||||
data:
 | 
			
		||||
  prometheus.yaml: |+
 | 
			
		||||
    datasources:
 | 
			
		||||
     - name: prometheus
 | 
			
		||||
       type: prometheus
 | 
			
		||||
       access: proxy
 | 
			
		||||
       org_id: 1
 | 
			
		||||
       url: http://prometheus-k8s.monitoring.svc:9090
 | 
			
		||||
       version: 1
 | 
			
		||||
       editable: false
 | 
			
		||||
 | 
			
		||||
@ -1,60 +0,0 @@
 | 
			
		||||
apiVersion: apps/v1beta1
 | 
			
		||||
kind: Deployment
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana
 | 
			
		||||
spec:
 | 
			
		||||
  replicas: 1
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      labels:
 | 
			
		||||
        app: grafana
 | 
			
		||||
    spec:
 | 
			
		||||
      securityContext:
 | 
			
		||||
        runAsNonRoot: true
 | 
			
		||||
        runAsUser: 65534
 | 
			
		||||
      containers:
 | 
			
		||||
      - name: grafana
 | 
			
		||||
        image: carlosedp/monitoring-grafana:v5.1.3
 | 
			
		||||
        volumeMounts:
 | 
			
		||||
        - name: grafana-config
 | 
			
		||||
          mountPath: /grafana/conf/config.ini
 | 
			
		||||
          subPath: config.ini
 | 
			
		||||
        - name: grafana-storage
 | 
			
		||||
          mountPath: /data
 | 
			
		||||
        - name: grafana-datasources
 | 
			
		||||
          mountPath: /grafana/conf/provisioning/datasources
 | 
			
		||||
        - name: grafana-dashboards
 | 
			
		||||
          mountPath: /grafana/conf/provisioning/dashboards
 | 
			
		||||
        - name: grafana-dashboard-definitions-0
 | 
			
		||||
          mountPath: /grafana-dashboard-definitions/0
 | 
			
		||||
        ports:
 | 
			
		||||
        - name: web
 | 
			
		||||
          containerPort: 3000
 | 
			
		||||
        env:
 | 
			
		||||
        - name: GF_INSTALL_PLUGINS
 | 
			
		||||
          value: "grafana-clock-panel,grafana-piechart-panel"
 | 
			
		||||
        - name: GF_PATHS_PLUGINS
 | 
			
		||||
          value: "/data/plugins"
 | 
			
		||||
        resources:
 | 
			
		||||
          requests:
 | 
			
		||||
            memory: 100Mi
 | 
			
		||||
            cpu: 100m
 | 
			
		||||
          limits:
 | 
			
		||||
            memory: 200Mi
 | 
			
		||||
            cpu: 200m
 | 
			
		||||
      volumes:
 | 
			
		||||
      - name: grafana-config
 | 
			
		||||
        configMap:
 | 
			
		||||
          name: grafana-config
 | 
			
		||||
      - name: grafana-storage
 | 
			
		||||
        persistentVolumeClaim:
 | 
			
		||||
          claimName: grafana-claim
 | 
			
		||||
      - name: grafana-datasources
 | 
			
		||||
        configMap:
 | 
			
		||||
          name: grafana-datasources
 | 
			
		||||
      - name: grafana-dashboards
 | 
			
		||||
        configMap:
 | 
			
		||||
          name: grafana-dashboards
 | 
			
		||||
      - name: grafana-dashboard-definitions-0
 | 
			
		||||
        configMap:
 | 
			
		||||
          name: grafana-dashboard-definitions-0
 | 
			
		||||
@ -1,20 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: Ingress
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana-external
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
  labels:
 | 
			
		||||
    traffic-type: external
 | 
			
		||||
  annotations:
 | 
			
		||||
    traefik.frontend.rule.type: PathPrefix
 | 
			
		||||
    #traefik.frontend.redirect.regex: ^http://(.*)
 | 
			
		||||
    #traefik.frontend.redirect.replacement: https://$1
 | 
			
		||||
spec:
 | 
			
		||||
  rules:
 | 
			
		||||
  - host: grafana.cloud.carlosedp.com
 | 
			
		||||
    http:
 | 
			
		||||
      paths:
 | 
			
		||||
      - path: /
 | 
			
		||||
        backend:
 | 
			
		||||
          serviceName: grafana
 | 
			
		||||
          servicePort: 3000
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: Ingress
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  rules:
 | 
			
		||||
  - host: grafana.internal.carlosedp.com
 | 
			
		||||
    http:
 | 
			
		||||
      paths:
 | 
			
		||||
      - path: /
 | 
			
		||||
        backend:
 | 
			
		||||
          serviceName: grafana
 | 
			
		||||
          servicePort: 3000
 | 
			
		||||
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  name: grafana
 | 
			
		||||
  labels:
 | 
			
		||||
    app: grafana
 | 
			
		||||
spec:
 | 
			
		||||
  type: NodePort
 | 
			
		||||
  ports:
 | 
			
		||||
  - port: 3000
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
    nodePort: 30902
 | 
			
		||||
    targetPort: web
 | 
			
		||||
  selector:
 | 
			
		||||
    app: grafana
 | 
			
		||||
@ -1,17 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
  name: kube-controller-manager-prometheus-discovery
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: kube-controller-manager
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    component: kube-controller-manager
 | 
			
		||||
  type: ClusterIP
 | 
			
		||||
  clusterIP: None
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: http-metrics
 | 
			
		||||
    port: 10252
 | 
			
		||||
    targetPort: 10252
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
@ -1,17 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
  name: kube-scheduler-prometheus-discovery
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: kube-scheduler
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    component: kube-scheduler
 | 
			
		||||
  type: ClusterIP
 | 
			
		||||
  clusterIP: None
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: http-metrics
 | 
			
		||||
    port: 10251
 | 
			
		||||
    targetPort: 10251
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
@ -1,12 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
kind: ClusterRoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: ClusterRole
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
@ -1,45 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
kind: ClusterRole
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - nodes
 | 
			
		||||
  - pods
 | 
			
		||||
  - services
 | 
			
		||||
  - resourcequotas
 | 
			
		||||
  - replicationcontrollers
 | 
			
		||||
  - limitranges
 | 
			
		||||
  - persistentvolumeclaims
 | 
			
		||||
  - persistentvolumes
 | 
			
		||||
  - namespaces
 | 
			
		||||
  - endpoints
 | 
			
		||||
  verbs: ["list", "watch"]
 | 
			
		||||
- apiGroups: ["extensions"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - daemonsets
 | 
			
		||||
  - deployments
 | 
			
		||||
  - replicasets
 | 
			
		||||
  verbs: ["list", "watch"]
 | 
			
		||||
- apiGroups: ["apps"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - statefulsets
 | 
			
		||||
  verbs: ["list", "watch"]
 | 
			
		||||
- apiGroups: ["batch"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - cronjobs
 | 
			
		||||
  - jobs
 | 
			
		||||
  verbs: ["list", "watch"]
 | 
			
		||||
- apiGroups: ["autoscaling"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - horizontalpodautoscalers
 | 
			
		||||
  verbs: ["list", "watch"]
 | 
			
		||||
- apiGroups: ["authentication.k8s.io"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - tokenreviews
 | 
			
		||||
  verbs: ["create"]
 | 
			
		||||
- apiGroups: ["authorization.k8s.io"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - subjectaccessreviews
 | 
			
		||||
  verbs: ["create"]
 | 
			
		||||
@ -1,55 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: Deployment
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
spec:
 | 
			
		||||
  replicas: 1
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      labels:
 | 
			
		||||
        app: kube-state-metrics
 | 
			
		||||
    spec:
 | 
			
		||||
      serviceAccountName: kube-state-metrics
 | 
			
		||||
      securityContext:
 | 
			
		||||
        runAsNonRoot: true
 | 
			
		||||
        runAsUser: 65534
 | 
			
		||||
      containers:
 | 
			
		||||
      - name: kube-state-metrics
 | 
			
		||||
        image: carlosedp/kube-state-metrics:v1.2.0
 | 
			
		||||
        args:
 | 
			
		||||
        - "--host=0.0.0.0"
 | 
			
		||||
        - "--port=8443"
 | 
			
		||||
        - "--telemetry-host=0.0.0.0"
 | 
			
		||||
        - "--telemetry-port=9443"
 | 
			
		||||
        ports:
 | 
			
		||||
        - name: http-main
 | 
			
		||||
          containerPort: 8443
 | 
			
		||||
        - name: http-self
 | 
			
		||||
          containerPort: 9443
 | 
			
		||||
      - name: addon-resizer
 | 
			
		||||
        image: carlosedp/addon-resizer:2.1
 | 
			
		||||
        resources:
 | 
			
		||||
          limits:
 | 
			
		||||
            cpu: 100m
 | 
			
		||||
            memory: 30Mi
 | 
			
		||||
          requests:
 | 
			
		||||
            cpu: 100m
 | 
			
		||||
            memory: 30Mi
 | 
			
		||||
        env:
 | 
			
		||||
          - name: MY_POD_NAME
 | 
			
		||||
            valueFrom:
 | 
			
		||||
              fieldRef:
 | 
			
		||||
                fieldPath: metadata.name
 | 
			
		||||
          - name: MY_POD_NAMESPACE
 | 
			
		||||
            valueFrom:
 | 
			
		||||
              fieldRef:
 | 
			
		||||
                fieldPath: metadata.namespace
 | 
			
		||||
        command:
 | 
			
		||||
          - /pod_nanny
 | 
			
		||||
          - --container=kube-state-metrics
 | 
			
		||||
          - --cpu=100m
 | 
			
		||||
          - --extra-cpu=2m
 | 
			
		||||
          - --memory=150Mi
 | 
			
		||||
          - --extra-memory=30Mi
 | 
			
		||||
          #- --threshold=5
 | 
			
		||||
          - --deployment=kube-state-metrics
 | 
			
		||||
@ -1,12 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
kind: RoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: Role
 | 
			
		||||
  name: kube-state-metrics-resizer
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
kind: Role
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-state-metrics-resizer
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - pods
 | 
			
		||||
  verbs: ["get"]
 | 
			
		||||
- apiGroups: ["extensions"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - deployments
 | 
			
		||||
  resourceNames: ["kube-state-metrics"]
 | 
			
		||||
  verbs: ["get", "update"]
 | 
			
		||||
 | 
			
		||||
@ -1,4 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ServiceAccount
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
@ -1,21 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  labels:
 | 
			
		||||
    app: kube-state-metrics
 | 
			
		||||
    k8s-app: kube-state-metrics
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
spec:
 | 
			
		||||
  clusterIP: None
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: http-main
 | 
			
		||||
    port: 8443
 | 
			
		||||
    targetPort: http-main
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
  - name: http-self
 | 
			
		||||
    port: 9443
 | 
			
		||||
    targetPort: http-self
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
  selector:
 | 
			
		||||
    app: kube-state-metrics
 | 
			
		||||
 | 
			
		||||
@ -1,12 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
kind: ClusterRoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: node-exporter
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: ClusterRole
 | 
			
		||||
  name: node-exporter
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: node-exporter
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
@ -1,13 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1
 | 
			
		||||
kind: ClusterRole
 | 
			
		||||
metadata:
 | 
			
		||||
  name: node-exporter
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups: ["authentication.k8s.io"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - tokenreviews
 | 
			
		||||
  verbs: ["create"]
 | 
			
		||||
- apiGroups: ["authorization.k8s.io"]
 | 
			
		||||
  resources:
 | 
			
		||||
  - subjectaccessreviews
 | 
			
		||||
  verbs: ["create"]
 | 
			
		||||
@ -1,59 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: DaemonSet
 | 
			
		||||
metadata:
 | 
			
		||||
  name: node-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  updateStrategy:
 | 
			
		||||
    rollingUpdate:
 | 
			
		||||
      maxUnavailable: 1
 | 
			
		||||
    type: RollingUpdate
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      labels:
 | 
			
		||||
        app: node-exporter
 | 
			
		||||
      name: node-exporter
 | 
			
		||||
    spec:
 | 
			
		||||
      serviceAccountName: node-exporter
 | 
			
		||||
      securityContext:
 | 
			
		||||
        runAsNonRoot: true
 | 
			
		||||
        runAsUser: 65534
 | 
			
		||||
      hostNetwork: true
 | 
			
		||||
      hostPID: true
 | 
			
		||||
      containers:
 | 
			
		||||
      - image: carlosedp/node_exporter:v0.15.2
 | 
			
		||||
        args:
 | 
			
		||||
        #- "--web.listen-address=0.0.0.1:9100"
 | 
			
		||||
        - "--path.procfs=/host/proc"
 | 
			
		||||
        - "--path.sysfs=/host/sys"
 | 
			
		||||
        - "--collector.filesystem.ignored-mount-points"
 | 
			
		||||
        - '^(\/(host|root)\/sys\/kernel\/debug\/).*'
 | 
			
		||||
        name: node-exporter
 | 
			
		||||
        ports:
 | 
			
		||||
        - containerPort: 9100
 | 
			
		||||
          hostPort: 9100
 | 
			
		||||
          name: http
 | 
			
		||||
        resources:
 | 
			
		||||
          requests:
 | 
			
		||||
            memory: 30Mi
 | 
			
		||||
            cpu: 100m
 | 
			
		||||
          limits:
 | 
			
		||||
            memory: 50Mi
 | 
			
		||||
            cpu: 200m
 | 
			
		||||
        volumeMounts:
 | 
			
		||||
        - name: proc
 | 
			
		||||
          readOnly:  true
 | 
			
		||||
          mountPath: /host/proc
 | 
			
		||||
        - name: sys
 | 
			
		||||
          readOnly: true
 | 
			
		||||
          mountPath: /host/sys
 | 
			
		||||
      tolerations:
 | 
			
		||||
        - effect: NoSchedule
 | 
			
		||||
          operator: Exists
 | 
			
		||||
      volumes:
 | 
			
		||||
      - name: proc
 | 
			
		||||
        hostPath:
 | 
			
		||||
          path: /proc
 | 
			
		||||
      - name: sys
 | 
			
		||||
        hostPath:
 | 
			
		||||
          path: /sys
 | 
			
		||||
 | 
			
		||||
@ -1,4 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ServiceAccount
 | 
			
		||||
metadata:
 | 
			
		||||
  name: node-exporter
 | 
			
		||||
@ -1,17 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  labels:
 | 
			
		||||
    app: node-exporter
 | 
			
		||||
    k8s-app: node-exporter
 | 
			
		||||
  name: node-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  type: ClusterIP
 | 
			
		||||
  clusterIP: None
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: http
 | 
			
		||||
    port: 9100
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
  selector:
 | 
			
		||||
    app: node-exporter
 | 
			
		||||
 | 
			
		||||
@ -1,12 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: ClusterRoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-operator
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: ClusterRole
 | 
			
		||||
  name: prometheus-operator
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: prometheus-operator
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
@ -1,54 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: ClusterRole
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-operator
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - extensions
 | 
			
		||||
  resources:
 | 
			
		||||
  - thirdpartyresources
 | 
			
		||||
  verbs:
 | 
			
		||||
  - "*"
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - apiextensions.k8s.io
 | 
			
		||||
  resources:
 | 
			
		||||
  - customresourcedefinitions
 | 
			
		||||
  verbs:
 | 
			
		||||
  - "*"
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - monitoring.coreos.com
 | 
			
		||||
  resources:
 | 
			
		||||
  - alertmanagers
 | 
			
		||||
  - prometheuses
 | 
			
		||||
  - prometheuses/finalizers
 | 
			
		||||
  - alertmanagers/finalizers
 | 
			
		||||
  - servicemonitors
 | 
			
		||||
  verbs:
 | 
			
		||||
  - "*"
 | 
			
		||||
- apiGroups:
 | 
			
		||||
  - apps
 | 
			
		||||
  resources:
 | 
			
		||||
  - statefulsets
 | 
			
		||||
  verbs: ["*"]
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - configmaps
 | 
			
		||||
  - secrets
 | 
			
		||||
  verbs: ["*"]
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - pods
 | 
			
		||||
  verbs: ["list", "delete"]
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - services
 | 
			
		||||
  - endpoints
 | 
			
		||||
  verbs: ["get", "create", "update"]
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - nodes
 | 
			
		||||
  verbs: ["list", "watch"]
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - namespaces
 | 
			
		||||
  verbs: ["list"]
 | 
			
		||||
@ -1,4 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ServiceAccount
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-operator
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-operator
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: prometheus-operator
 | 
			
		||||
spec:
 | 
			
		||||
  type: ClusterIP
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: http
 | 
			
		||||
    port: 8080
 | 
			
		||||
    targetPort: http
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
  selector:
 | 
			
		||||
    k8s-app: prometheus-operator
 | 
			
		||||
@ -1,34 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: Deployment
 | 
			
		||||
metadata:
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: prometheus-operator
 | 
			
		||||
  name: prometheus-operator
 | 
			
		||||
spec:
 | 
			
		||||
  replicas: 1
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      labels:
 | 
			
		||||
        k8s-app: prometheus-operator
 | 
			
		||||
    spec:
 | 
			
		||||
      containers:
 | 
			
		||||
      - args:
 | 
			
		||||
        - --kubelet-service=kube-system/kubelet
 | 
			
		||||
        - --prometheus-config-reloader=carlosedp/prometheus-config-reloader:v0.0.2
 | 
			
		||||
        - --config-reloader-image=carlosedp/configmap-reload:v0.2.2-arm
 | 
			
		||||
        image: carlosedp/prometheus-operator:v0.17.0
 | 
			
		||||
        name: prometheus-operator
 | 
			
		||||
        ports:
 | 
			
		||||
        - containerPort: 8080
 | 
			
		||||
          name: http
 | 
			
		||||
        resources:
 | 
			
		||||
          limits:
 | 
			
		||||
            cpu: 200m
 | 
			
		||||
            memory: 100Mi
 | 
			
		||||
          requests:
 | 
			
		||||
            cpu: 100m
 | 
			
		||||
            memory: 50Mi
 | 
			
		||||
      securityContext:
 | 
			
		||||
        runAsNonRoot: true
 | 
			
		||||
        runAsUser: 65534
 | 
			
		||||
      serviceAccountName: prometheus-operator
 | 
			
		||||
@ -1,14 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: Ingress
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  rules:
 | 
			
		||||
  - host: prometheus.internal.carlosedp.com
 | 
			
		||||
    http:
 | 
			
		||||
      paths:
 | 
			
		||||
      - path: /
 | 
			
		||||
        backend:
 | 
			
		||||
          serviceName: prometheus-k8s
 | 
			
		||||
          servicePort: web
 | 
			
		||||
@ -1,54 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: RoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: Role
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
---
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: RoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: Role
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
---
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: RoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: default
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: Role
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
---
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: ClusterRoleBinding
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
roleRef:
 | 
			
		||||
  apiGroup: rbac.authorization.k8s.io
 | 
			
		||||
  kind: ClusterRole
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
subjects:
 | 
			
		||||
- kind: ServiceAccount
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
@ -1,55 +0,0 @@
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: Role
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - nodes
 | 
			
		||||
  - services
 | 
			
		||||
  - endpoints
 | 
			
		||||
  - pods
 | 
			
		||||
  verbs: ["get", "list", "watch"]
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - configmaps
 | 
			
		||||
  verbs: ["get"]
 | 
			
		||||
---
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: Role
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: kube-system
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - services
 | 
			
		||||
  - endpoints
 | 
			
		||||
  - pods
 | 
			
		||||
  verbs: ["get", "list", "watch"]
 | 
			
		||||
---
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: Role
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
  namespace: default
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - services
 | 
			
		||||
  - endpoints
 | 
			
		||||
  - pods
 | 
			
		||||
  verbs: ["get", "list", "watch"]
 | 
			
		||||
---
 | 
			
		||||
apiVersion: rbac.authorization.k8s.io/v1beta1
 | 
			
		||||
kind: ClusterRole
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
rules:
 | 
			
		||||
- apiGroups: [""]
 | 
			
		||||
  resources:
 | 
			
		||||
  - nodes/metrics
 | 
			
		||||
  verbs: ["get"]
 | 
			
		||||
- nonResourceURLs: ["/metrics"]
 | 
			
		||||
  verbs: ["get"]
 | 
			
		||||
@ -1,602 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ConfigMap
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s-rules
 | 
			
		||||
  labels:
 | 
			
		||||
    role: prometheus-rulefiles
 | 
			
		||||
    prometheus: k8s
 | 
			
		||||
data:
 | 
			
		||||
  alertmanager.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: alertmanager.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - alert: AlertmanagerConfigInconsistent
 | 
			
		||||
        expr: count_values("config_hash", alertmanager_config_hash) BY (service) / ON(service)
 | 
			
		||||
          GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas, "service",
 | 
			
		||||
          "alertmanager-$1", "alertmanager", "(.*)") != 1
 | 
			
		||||
        for: 5m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: The configuration of the instances of the Alertmanager cluster
 | 
			
		||||
            `{{$labels.service}}` are out of sync.
 | 
			
		||||
      - alert: AlertmanagerDownOrMissing
 | 
			
		||||
        expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
 | 
			
		||||
          "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
 | 
			
		||||
        for: 5m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: An unexpected number of Alertmanagers are scraped or Alertmanagers
 | 
			
		||||
            disappeared from discovery.
 | 
			
		||||
      - alert: AlertmanagerFailedReload
 | 
			
		||||
        expr: alertmanager_config_last_reload_successful == 0
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
 | 
			
		||||
            }}/{{ $labels.pod}}.
 | 
			
		||||
  etcd3.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: ./etcd3.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - alert: InsufficientMembers
 | 
			
		||||
        expr: count(up{job="etcd"} == 0) > (count(up{job="etcd"}) / 2 - 1)
 | 
			
		||||
        for: 3m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: If one more etcd member goes down the cluster will be unavailable
 | 
			
		||||
          summary: etcd cluster insufficient members
 | 
			
		||||
      - alert: NoLeader
 | 
			
		||||
        expr: etcd_server_has_leader{job="etcd"} == 0
 | 
			
		||||
        for: 1m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: etcd member {{ $labels.instance }} has no leader
 | 
			
		||||
          summary: etcd member has no leader
 | 
			
		||||
      - alert: HighNumberOfLeaderChanges
 | 
			
		||||
        expr: increase(etcd_server_leader_changes_seen_total{job="etcd"}[1h]) > 3
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: etcd instance {{ $labels.instance }} has seen {{ $value }} leader
 | 
			
		||||
            changes within the last hour
 | 
			
		||||
          summary: a high number of leader changes within the etcd cluster are happening
 | 
			
		||||
      - alert: HighNumberOfFailedGRPCRequests
 | 
			
		||||
        expr: sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd"}[5m])) BY (grpc_service, grpc_method)
 | 
			
		||||
          / sum(rate(grpc_server_handled_total{job="etcd"}[5m])) BY (grpc_service, grpc_method) > 0.01
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{ $value }}% of requests for {{ $labels.grpc_method }} failed
 | 
			
		||||
            on etcd instance {{ $labels.instance }}'
 | 
			
		||||
          summary: a high number of gRPC requests are failing
 | 
			
		||||
      - alert: HighNumberOfFailedGRPCRequests
 | 
			
		||||
        expr: sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd"}[5m])) BY (grpc_service, grpc_method)
 | 
			
		||||
          / sum(rate(grpc_server_handled_total{job="etcd"}[5m])) BY (grpc_service, grpc_method) > 0.05
 | 
			
		||||
        for: 5m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{ $value }}% of requests for {{ $labels.grpc_method }} failed
 | 
			
		||||
            on etcd instance {{ $labels.instance }}'
 | 
			
		||||
          summary: a high number of gRPC requests are failing
 | 
			
		||||
      - alert: GRPCRequestsSlow
 | 
			
		||||
        expr: histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job="etcd",grpc_type="unary"}[5m])) by (grpc_service, grpc_method, le))
 | 
			
		||||
          > 0.15
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method
 | 
			
		||||
            }} are slow
 | 
			
		||||
          summary: slow gRPC requests
 | 
			
		||||
      - alert: HighNumberOfFailedHTTPRequests
 | 
			
		||||
        expr: sum(rate(etcd_http_failed_total{job="etcd"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job="etcd"}[5m]))
 | 
			
		||||
          BY (method) > 0.01
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd
 | 
			
		||||
            instance {{ $labels.instance }}'
 | 
			
		||||
          summary: a high number of HTTP requests are failing
 | 
			
		||||
      - alert: HighNumberOfFailedHTTPRequests
 | 
			
		||||
        expr: sum(rate(etcd_http_failed_total{job="etcd"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job="etcd"}[5m]))
 | 
			
		||||
          BY (method) > 0.05
 | 
			
		||||
        for: 5m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd
 | 
			
		||||
            instance {{ $labels.instance }}'
 | 
			
		||||
          summary: a high number of HTTP requests are failing
 | 
			
		||||
      - alert: HTTPRequestsSlow
 | 
			
		||||
        expr: histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))
 | 
			
		||||
          > 0.15
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: on etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method
 | 
			
		||||
            }} are slow
 | 
			
		||||
          summary: slow HTTP requests
 | 
			
		||||
      - alert: EtcdMemberCommunicationSlow
 | 
			
		||||
        expr: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m]))
 | 
			
		||||
          > 0.15
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: etcd instance {{ $labels.instance }} member communication with
 | 
			
		||||
            {{ $labels.To }} is slow
 | 
			
		||||
          summary: etcd member communication is slow
 | 
			
		||||
      - alert: HighNumberOfFailedProposals
 | 
			
		||||
        expr: increase(etcd_server_proposals_failed_total{job="etcd"}[1h]) > 5
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: etcd instance {{ $labels.instance }} has seen {{ $value }} proposal
 | 
			
		||||
            failures within the last hour
 | 
			
		||||
          summary: a high number of proposals within the etcd cluster are failing
 | 
			
		||||
      - alert: HighFsyncDurations
 | 
			
		||||
        expr: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m]))
 | 
			
		||||
          > 0.5
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: etcd instance {{ $labels.instance }} fync durations are high
 | 
			
		||||
          summary: high fsync durations
 | 
			
		||||
      - alert: HighCommitDurations
 | 
			
		||||
        expr: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m]))
 | 
			
		||||
          > 0.25
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: etcd instance {{ $labels.instance }} commit durations are high
 | 
			
		||||
          summary: high commit durations
 | 
			
		||||
  general.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: general.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - alert: TargetDown
 | 
			
		||||
        expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{ $value }}% of {{ $labels.job }} targets are down.'
 | 
			
		||||
          summary: Targets are down
 | 
			
		||||
      - alert: DeadMansSwitch
 | 
			
		||||
        expr: vector(1)
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: none
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: This is a DeadMansSwitch meant to ensure that the entire Alerting
 | 
			
		||||
            pipeline is functional.
 | 
			
		||||
          summary: Alerting DeadMansSwitch
 | 
			
		||||
      - record: fd_utilization
 | 
			
		||||
        expr: process_open_fds / process_max_fds
 | 
			
		||||
      - alert: FdExhaustionClose
 | 
			
		||||
        expr: predict_linear(fd_utilization[1h], 3600 * 4) > 1
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
 | 
			
		||||
            will exhaust in file/socket descriptors within the next 4 hours'
 | 
			
		||||
          summary: file descriptors soon exhausted
 | 
			
		||||
      - alert: FdExhaustionClose
 | 
			
		||||
        expr: predict_linear(fd_utilization[10m], 3600) > 1
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
 | 
			
		||||
            will exhaust in file/socket descriptors within the next hour'
 | 
			
		||||
          summary: file descriptors soon exhausted
 | 
			
		||||
  kube-controller-manager.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: kube-controller-manager.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - alert: K8SControllerManagerDown
 | 
			
		||||
        expr: absent(up{job="kube-controller-manager"} == 1)
 | 
			
		||||
        for: 5m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: There is no running K8S controller manager. Deployments and replication
 | 
			
		||||
            controllers are not making progress.
 | 
			
		||||
          runbook: https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager
 | 
			
		||||
          summary: Controller manager is down
 | 
			
		||||
  kube-scheduler.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: kube-scheduler.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.99"
 | 
			
		||||
      - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.9, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.9"
 | 
			
		||||
      - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.5, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.5"
 | 
			
		||||
      - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.99"
 | 
			
		||||
      - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.9, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.9"
 | 
			
		||||
      - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.5, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.5"
 | 
			
		||||
      - record: cluster:scheduler_binding_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.99"
 | 
			
		||||
      - record: cluster:scheduler_binding_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.9, sum(scheduler_binding_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.9"
 | 
			
		||||
      - record: cluster:scheduler_binding_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.5, sum(scheduler_binding_latency_microseconds_bucket)
 | 
			
		||||
          BY (le, cluster)) / 1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.5"
 | 
			
		||||
      - alert: K8SSchedulerDown
 | 
			
		||||
        expr: absent(up{job="kube-scheduler"} == 1)
 | 
			
		||||
        for: 5m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: There is no running K8S scheduler. New pods are not being assigned
 | 
			
		||||
            to nodes.
 | 
			
		||||
          runbook: https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler
 | 
			
		||||
          summary: Scheduler is down
 | 
			
		||||
  kube-state-metrics.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: kube-state-metrics.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - alert: DeploymentGenerationMismatch
 | 
			
		||||
        expr: kube_deployment_status_observed_generation != kube_deployment_metadata_generation
 | 
			
		||||
        for: 15m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Observed deployment generation does not match expected one for
 | 
			
		||||
            deployment {{$labels.namespaces}}/{{$labels.deployment}}
 | 
			
		||||
          summary: Deployment is outdated
 | 
			
		||||
      - alert: DeploymentReplicasNotUpdated
 | 
			
		||||
        expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas)
 | 
			
		||||
          or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas))
 | 
			
		||||
          unless (kube_deployment_spec_paused == 1)
 | 
			
		||||
        for: 15m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Replicas are not updated and available for deployment {{$labels.namespaces}}/{{$labels.deployment}}
 | 
			
		||||
          summary: Deployment replicas are outdated
 | 
			
		||||
      - alert: DaemonSetRolloutStuck
 | 
			
		||||
        expr: kube_daemonset_status_number_ready / kube_daemonset_status_desired_number_scheduled
 | 
			
		||||
          * 100 < 100
 | 
			
		||||
        for: 15m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Only {{$value}}% of desired pods scheduled and ready for daemon
 | 
			
		||||
            set {{$labels.namespaces}}/{{$labels.daemonset}}
 | 
			
		||||
          summary: DaemonSet is missing pods
 | 
			
		||||
      - alert: K8SDaemonSetsNotScheduled
 | 
			
		||||
        expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled
 | 
			
		||||
          > 0
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: A number of daemonsets are not scheduled.
 | 
			
		||||
          summary: Daemonsets are not scheduled correctly
 | 
			
		||||
      - alert: DaemonSetsMissScheduled
 | 
			
		||||
        expr: kube_daemonset_status_number_misscheduled > 0
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: A number of daemonsets are running where they are not supposed
 | 
			
		||||
            to run.
 | 
			
		||||
          summary: Daemonsets are not scheduled correctly
 | 
			
		||||
      - alert: PodFrequentlyRestarting
 | 
			
		||||
        expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Pod {{$labels.namespaces}}/{{$labels.pod}} is was restarted {{$value}}
 | 
			
		||||
            times within the last hour
 | 
			
		||||
          summary: Pod is restarting frequently
 | 
			
		||||
  kubelet.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: kubelet.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - alert: K8SNodeNotReady
 | 
			
		||||
        expr: kube_node_status_condition{condition="Ready",status="true"} == 0
 | 
			
		||||
        for: 1h
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: The Kubelet on {{ $labels.node }} has not checked in with the API,
 | 
			
		||||
            or has set itself to NotReady, for more than an hour
 | 
			
		||||
          summary: Node status is NotReady
 | 
			
		||||
      - alert: K8SManyNodesNotReady
 | 
			
		||||
        expr: count(kube_node_status_condition{condition="Ready",status="true"} == 0)
 | 
			
		||||
          > 1 and (count(kube_node_status_condition{condition="Ready",status="true"} ==
 | 
			
		||||
          0) / count(kube_node_status_condition{condition="Ready",status="true"})) > 0.2
 | 
			
		||||
        for: 1m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{ $value }}% of Kubernetes nodes are not ready'
 | 
			
		||||
      - alert: K8SKubeletDown
 | 
			
		||||
        expr: count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) * 100 > 3
 | 
			
		||||
        for: 1h
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Prometheus failed to scrape {{ $value }}% of kubelets.
 | 
			
		||||
      - alert: K8SKubeletDown
 | 
			
		||||
        expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
 | 
			
		||||
          * 100 > 1
 | 
			
		||||
        for: 1h
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Prometheus failed to scrape {{ $value }}% of kubelets, or all Kubelets
 | 
			
		||||
            have disappeared from service discovery.
 | 
			
		||||
          summary: Many Kubelets cannot be scraped
 | 
			
		||||
      - alert: K8SKubeletTooManyPods
 | 
			
		||||
        expr: kubelet_running_pod_count > 100
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Kubelet {{$labels.instance}} is running {{$value}} pods, close
 | 
			
		||||
            to the limit of 110
 | 
			
		||||
          summary: Kubelet is close to pod limit
 | 
			
		||||
  kubernetes.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: kubernetes.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - record: pod_name:container_memory_usage_bytes:sum
 | 
			
		||||
        expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY
 | 
			
		||||
          (pod_name)
 | 
			
		||||
      - record: pod_name:container_spec_cpu_shares:sum
 | 
			
		||||
        expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) BY (pod_name)
 | 
			
		||||
      - record: pod_name:container_cpu_usage:sum
 | 
			
		||||
        expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m]))
 | 
			
		||||
          BY (pod_name)
 | 
			
		||||
      - record: pod_name:container_fs_usage_bytes:sum
 | 
			
		||||
        expr: sum(container_fs_usage_bytes{container_name!="POD",pod_name!=""}) BY (pod_name)
 | 
			
		||||
      - record: namespace:container_memory_usage_bytes:sum
 | 
			
		||||
        expr: sum(container_memory_usage_bytes{container_name!=""}) BY (namespace)
 | 
			
		||||
      - record: namespace:container_spec_cpu_shares:sum
 | 
			
		||||
        expr: sum(container_spec_cpu_shares{container_name!=""}) BY (namespace)
 | 
			
		||||
      - record: namespace:container_cpu_usage:sum
 | 
			
		||||
        expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD"}[5m]))
 | 
			
		||||
          BY (namespace)
 | 
			
		||||
      - record: cluster:memory_usage:ratio
 | 
			
		||||
        expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY
 | 
			
		||||
          (cluster) / sum(machine_memory_bytes) BY (cluster)
 | 
			
		||||
      - record: cluster:container_spec_cpu_shares:ratio
 | 
			
		||||
        expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) / 1000
 | 
			
		||||
          / sum(machine_cpu_cores)
 | 
			
		||||
      - record: cluster:container_cpu_usage:ratio
 | 
			
		||||
        expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m]))
 | 
			
		||||
          / sum(machine_cpu_cores)
 | 
			
		||||
      - record: apiserver_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.99, rate(apiserver_request_latencies_bucket[5m])) /
 | 
			
		||||
          1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.99"
 | 
			
		||||
      - record: apiserver_latency:quantile_seconds
 | 
			
		||||
        expr: histogram_quantile(0.9, rate(apiserver_request_latencies_bucket[5m])) /
 | 
			
		||||
          1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.9"
 | 
			
		||||
      - record: apiserver_latency_seconds:quantile
 | 
			
		||||
        expr: histogram_quantile(0.5, rate(apiserver_request_latencies_bucket[5m])) /
 | 
			
		||||
          1e+06
 | 
			
		||||
        labels:
 | 
			
		||||
          quantile: "0.5"
 | 
			
		||||
      - alert: APIServerLatencyHigh
 | 
			
		||||
        expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
 | 
			
		||||
          > 1
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: the API server has a 99th percentile latency of {{ $value }} seconds
 | 
			
		||||
            for {{$labels.verb}} {{$labels.resource}}
 | 
			
		||||
      - alert: APIServerLatencyHigh
 | 
			
		||||
        expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
 | 
			
		||||
          > 4
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: the API server has a 99th percentile latency of {{ $value }} seconds
 | 
			
		||||
            for {{$labels.verb}} {{$labels.resource}}
 | 
			
		||||
      - alert: APIServerErrorsHigh
 | 
			
		||||
        expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
 | 
			
		||||
          * 100 > 2
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: API server returns errors for {{ $value }}% of requests
 | 
			
		||||
      - alert: APIServerErrorsHigh
 | 
			
		||||
        expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
 | 
			
		||||
          * 100 > 5
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: API server returns errors for {{ $value }}% of requests
 | 
			
		||||
      - alert: K8SApiserverDown
 | 
			
		||||
        expr: absent(up{job="apiserver"} == 1)
 | 
			
		||||
        for: 20m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: No API servers are reachable or all have disappeared from service
 | 
			
		||||
            discovery
 | 
			
		||||
    
 | 
			
		||||
      - alert: K8sCertificateExpirationNotice
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Kubernetes API Certificate is expiring soon (less than 7 days)
 | 
			
		||||
        expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
 | 
			
		||||
    
 | 
			
		||||
      - alert: K8sCertificateExpirationNotice
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Kubernetes API Certificate is expiring in less than 1 day
 | 
			
		||||
        expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
 | 
			
		||||
  node.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: node.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - record: instance:node_cpu:rate:sum
 | 
			
		||||
        expr: sum(rate(node_cpu{mode!="idle",mode!="iowait",mode!~"^(?:guest.*)$"}[3m]))
 | 
			
		||||
          BY (instance)
 | 
			
		||||
      - record: instance:node_filesystem_usage:sum
 | 
			
		||||
        expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"}))
 | 
			
		||||
          BY (instance)
 | 
			
		||||
      - record: instance:node_network_receive_bytes:rate:sum
 | 
			
		||||
        expr: sum(rate(node_network_receive_bytes[3m])) BY (instance)
 | 
			
		||||
      - record: instance:node_network_transmit_bytes:rate:sum
 | 
			
		||||
        expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance)
 | 
			
		||||
      - record: instance:node_cpu:ratio
 | 
			
		||||
        expr: sum(rate(node_cpu{mode!="idle"}[5m])) WITHOUT (cpu, mode) / ON(instance)
 | 
			
		||||
          GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance)
 | 
			
		||||
      - record: cluster:node_cpu:sum_rate5m
 | 
			
		||||
        expr: sum(rate(node_cpu{mode!="idle"}[5m]))
 | 
			
		||||
      - record: cluster:node_cpu:ratio
 | 
			
		||||
        expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu))
 | 
			
		||||
      - alert: NodeExporterDown
 | 
			
		||||
        expr: absent(up{job="node-exporter"} == 1)
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Prometheus could not scrape a node-exporter for more than 10m,
 | 
			
		||||
            or node-exporters have disappeared from discovery
 | 
			
		||||
      - alert: NodeDiskRunningFull
 | 
			
		||||
        expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0
 | 
			
		||||
        for: 30m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: device {{$labels.device}} on node {{$labels.instance}} is running
 | 
			
		||||
            full within the next 24 hours (mounted at {{$labels.mountpoint}})
 | 
			
		||||
      - alert: NodeDiskRunningFull
 | 
			
		||||
        expr: predict_linear(node_filesystem_free[30m], 3600 * 2) < 0
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: device {{$labels.device}} on node {{$labels.instance}} is running
 | 
			
		||||
            full within the next 2 hours (mounted at {{$labels.mountpoint}})
 | 
			
		||||
  prometheus.rules.yaml: |+
 | 
			
		||||
    groups:
 | 
			
		||||
    - name: prometheus.rules
 | 
			
		||||
      rules:
 | 
			
		||||
      - alert: PrometheusConfigReloadFailed
 | 
			
		||||
        expr: prometheus_config_last_reload_successful == 0
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
 | 
			
		||||
      - alert: PrometheusNotificationQueueRunningFull
 | 
			
		||||
        expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
 | 
			
		||||
            $labels.pod}}
 | 
			
		||||
      - alert: PrometheusErrorSendingAlerts
 | 
			
		||||
        expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
 | 
			
		||||
          > 0.01
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
 | 
			
		||||
            $labels.pod}} to Alertmanager {{$labels.Alertmanager}}
 | 
			
		||||
      - alert: PrometheusErrorSendingAlerts
 | 
			
		||||
        expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
 | 
			
		||||
          > 0.03
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: critical
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
 | 
			
		||||
            $labels.pod}} to Alertmanager {{$labels.Alertmanager}}
 | 
			
		||||
      - alert: PrometheusNotConnectedToAlertmanagers
 | 
			
		||||
        expr: prometheus_notifications_alertmanagers_discovered < 1
 | 
			
		||||
        for: 10m
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
 | 
			
		||||
            to any Alertmanagers
 | 
			
		||||
      - alert: PrometheusTSDBReloadsFailing
 | 
			
		||||
        expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
 | 
			
		||||
        for: 12h
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}
 | 
			
		||||
            reload failures over the last four hours.'
 | 
			
		||||
          summary: Prometheus has issues reloading data blocks from disk
 | 
			
		||||
      - alert: PrometheusTSDBCompactionsFailing
 | 
			
		||||
        expr: increase(prometheus_tsdb_compactions_failed_total[2h]) > 0
 | 
			
		||||
        for: 12h
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}
 | 
			
		||||
            compaction failures over the last four hours.'
 | 
			
		||||
          summary: Prometheus has issues compacting sample blocks
 | 
			
		||||
      - alert: PrometheusTSDBWALCorruptions
 | 
			
		||||
        expr: tsdb_wal_corruptions_total > 0
 | 
			
		||||
        for: 4h
 | 
			
		||||
        labels:
 | 
			
		||||
          severity: warning
 | 
			
		||||
        annotations:
 | 
			
		||||
          description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead
 | 
			
		||||
            log (WAL).'
 | 
			
		||||
          summary: Prometheus write-ahead log is corrupted
 | 
			
		||||
@ -1,4 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: ServiceAccount
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
@ -1,16 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: alertmanager
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: alertmanager
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      alertmanager: main
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - monitoring
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: web
 | 
			
		||||
    interval: 30s
 | 
			
		||||
@ -1,23 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-apiserver
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: apiserver
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: component
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      component: apiserver
 | 
			
		||||
      provider: kubernetes
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - default
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: https
 | 
			
		||||
    interval: 30s
 | 
			
		||||
    scheme: https
 | 
			
		||||
    tlsConfig:
 | 
			
		||||
      caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 | 
			
		||||
      serverName: kubernetes
 | 
			
		||||
    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
 | 
			
		||||
@ -1,18 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: arm-exporter
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: arm-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: k8s-app
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      k8s-app: arm-exporter
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - monitoring
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: http
 | 
			
		||||
    interval: 30s
 | 
			
		||||
@ -1,17 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-controller-manager
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: kube-controller-manager
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: k8s-app
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: http-metrics
 | 
			
		||||
    interval: 30s
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      k8s-app: kube-controller-manager
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - kube-system
 | 
			
		||||
@ -1,17 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-scheduler
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: kube-scheduler
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: k8s-app
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: http-metrics
 | 
			
		||||
    interval: 30s
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      k8s-app: kube-scheduler
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - kube-system
 | 
			
		||||
@ -1,28 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kube-state-metrics
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: kube-state-metrics
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: k8s-app
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      k8s-app: kube-state-metrics
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - monitoring
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: http-main
 | 
			
		||||
    #scheme: https
 | 
			
		||||
    interval: 30s
 | 
			
		||||
    honorLabels: true
 | 
			
		||||
    #bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
 | 
			
		||||
    #tlsConfig:
 | 
			
		||||
    #  insecureSkipVerify: true
 | 
			
		||||
  - port: http-self
 | 
			
		||||
  #  scheme: https
 | 
			
		||||
    interval: 30s
 | 
			
		||||
  #  bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
 | 
			
		||||
  #  tlsConfig:
 | 
			
		||||
  #    insecureSkipVerify: true
 | 
			
		||||
@ -1,29 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: kubelet
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: kubelet
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: k8s-app
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: https-metrics
 | 
			
		||||
    scheme: https
 | 
			
		||||
    interval: 30s
 | 
			
		||||
    tlsConfig:
 | 
			
		||||
      insecureSkipVerify: true
 | 
			
		||||
    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
 | 
			
		||||
  - port: https-metrics
 | 
			
		||||
    scheme: https
 | 
			
		||||
    path: /metrics/cadvisor
 | 
			
		||||
    interval: 30s
 | 
			
		||||
    honorLabels: true
 | 
			
		||||
    tlsConfig:
 | 
			
		||||
      insecureSkipVerify: true
 | 
			
		||||
    bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      k8s-app: kubelet
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - kube-system
 | 
			
		||||
@ -1,21 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: node-exporter
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: node-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: k8s-app
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      k8s-app: node-exporter
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - monitoring
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: http
 | 
			
		||||
    #scheme: http
 | 
			
		||||
    interval: 30s
 | 
			
		||||
    #bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
 | 
			
		||||
    #tlsConfig:
 | 
			
		||||
    #  insecureSkipVerify: true
 | 
			
		||||
@ -1,12 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus-operator
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: prometheus-operator
 | 
			
		||||
spec:
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: http
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      k8s-app: prometheus-operator
 | 
			
		||||
@ -1,16 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: prometheus
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: prometheus
 | 
			
		||||
spec:
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      prometheus: k8s
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - monitoring
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: web
 | 
			
		||||
    interval: 30s
 | 
			
		||||
@ -1,17 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  name: traefik-ingress-lb
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: traefik-ingress-lb
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: k8s-app
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - port: admin
 | 
			
		||||
    interval: 30s
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      k8s-app: traefik-ingress-lb
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - kube-system
 | 
			
		||||
@ -1,16 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  labels:
 | 
			
		||||
    prometheus: k8s
 | 
			
		||||
  name: prometheus-k8s
 | 
			
		||||
spec:
 | 
			
		||||
  type: NodePort
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: web
 | 
			
		||||
    nodePort: 30900
 | 
			
		||||
    port: 9090
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
    targetPort: web
 | 
			
		||||
  selector:
 | 
			
		||||
    prometheus: k8s
 | 
			
		||||
@ -1,39 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: Prometheus
 | 
			
		||||
metadata:
 | 
			
		||||
  name: k8s
 | 
			
		||||
  labels:
 | 
			
		||||
    prometheus: k8s
 | 
			
		||||
spec:
 | 
			
		||||
  replicas: 1
 | 
			
		||||
  baseImage: carlosedp/prometheus
 | 
			
		||||
  version: v2.2.1
 | 
			
		||||
  retention: "168h"
 | 
			
		||||
  serviceAccountName: prometheus-k8s
 | 
			
		||||
  serviceMonitorSelector:
 | 
			
		||||
    matchExpressions:
 | 
			
		||||
    - {key: k8s-app, operator: Exists}
 | 
			
		||||
  ruleSelector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      role: prometheus-rulefiles
 | 
			
		||||
      prometheus: k8s
 | 
			
		||||
  resources:
 | 
			
		||||
    requests:
 | 
			
		||||
      # 2Gi is default, but won't schedule if you don't have a node with >2Gi
 | 
			
		||||
      # memory. Modify based on your target and time-series count for
 | 
			
		||||
      # production use. This value is mainly meant for demonstration/testing
 | 
			
		||||
      # purposes.
 | 
			
		||||
      memory: 512Mi
 | 
			
		||||
  alerting:
 | 
			
		||||
    alertmanagers:
 | 
			
		||||
    - namespace: monitoring
 | 
			
		||||
      name: alertmanager-main
 | 
			
		||||
      port: web
 | 
			
		||||
  storage:
 | 
			
		||||
    volumeClaimTemplate:
 | 
			
		||||
      spec:
 | 
			
		||||
        accessModes:
 | 
			
		||||
        - ReadWriteOnce
 | 
			
		||||
        resources:
 | 
			
		||||
          requests:
 | 
			
		||||
            storage: 50Gi
 | 
			
		||||
@ -1,10 +0,0 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
echo "Please enter your Gmail account";
 | 
			
		||||
read username;
 | 
			
		||||
 | 
			
		||||
echo "Please enter your Gmail password";
 | 
			
		||||
read -s password;
 | 
			
		||||
 | 
			
		||||
echo "Creating secret"
 | 
			
		||||
kubectl create secret generic smtp-account -n monitoring --from-literal=username=${username} --from-literal=password=${password}
 | 
			
		||||
@ -1,68 +0,0 @@
 | 
			
		||||
apiVersion: extensions/v1beta1
 | 
			
		||||
kind: Deployment
 | 
			
		||||
metadata:
 | 
			
		||||
  annotations:
 | 
			
		||||
    deployment.kubernetes.io/revision: "1"
 | 
			
		||||
  generation: 1
 | 
			
		||||
  labels:
 | 
			
		||||
    run: smtp-server
 | 
			
		||||
  name: smtp-server
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  replicas: 1
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      run: smtp-server
 | 
			
		||||
  strategy:
 | 
			
		||||
    rollingUpdate:
 | 
			
		||||
      maxSurge: 1
 | 
			
		||||
      maxUnavailable: 1
 | 
			
		||||
    type: RollingUpdate
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      creationTimestamp: null
 | 
			
		||||
      labels:
 | 
			
		||||
        run: smtp-server
 | 
			
		||||
    spec:
 | 
			
		||||
      containers:
 | 
			
		||||
      - image: carlosedp/docker-smtp
 | 
			
		||||
        imagePullPolicy: Always
 | 
			
		||||
        name: smtp-server
 | 
			
		||||
        ports:
 | 
			
		||||
        - containerPort: 25
 | 
			
		||||
          protocol: TCP
 | 
			
		||||
        env:
 | 
			
		||||
        - name: GMAIL_USER
 | 
			
		||||
          valueFrom:
 | 
			
		||||
            secretKeyRef:
 | 
			
		||||
              name: smtp-account
 | 
			
		||||
              key: username
 | 
			
		||||
        - name: GMAIL_PASSWORD
 | 
			
		||||
          valueFrom:
 | 
			
		||||
            secretKeyRef:
 | 
			
		||||
              name: smtp-account
 | 
			
		||||
              key: password
 | 
			
		||||
        - name: DISABLE_IPV6
 | 
			
		||||
          value: "True"
 | 
			
		||||
        - name: RELAY_DOMAINS
 | 
			
		||||
          value: ":192.168.0.0/24:10.0.0.0/16"
 | 
			
		||||
        resources: {}
 | 
			
		||||
      restartPolicy: Always
 | 
			
		||||
  replicas: 1
 | 
			
		||||
---
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  labels:
 | 
			
		||||
    run: smtp-server
 | 
			
		||||
  name: smtp-server
 | 
			
		||||
  namespace: monitoring
 | 
			
		||||
spec:
 | 
			
		||||
  ports:
 | 
			
		||||
  - nodePort: 30025
 | 
			
		||||
    port: 25
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
    targetPort: 25
 | 
			
		||||
  selector:
 | 
			
		||||
    run: smtp-server
 | 
			
		||||
  type: NodePort
 | 
			
		||||
@ -1,20 +0,0 @@
 | 
			
		||||
apiVersion: apps/v1beta1
 | 
			
		||||
kind: Deployment
 | 
			
		||||
metadata:
 | 
			
		||||
  name: snmp-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  replicas: 1
 | 
			
		||||
  selector:
 | 
			
		||||
    matchLabels:
 | 
			
		||||
      app: snmp-exporter
 | 
			
		||||
  template:
 | 
			
		||||
    metadata:
 | 
			
		||||
      labels:
 | 
			
		||||
        app: snmp-exporter
 | 
			
		||||
    spec:
 | 
			
		||||
      containers:
 | 
			
		||||
      - image: carlosedp/snmp_exporter:v0.9.0
 | 
			
		||||
        name: snmp-exporter
 | 
			
		||||
        ports:
 | 
			
		||||
        - containerPort: 9116
 | 
			
		||||
          name: metrics
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
apiVersion: v1
 | 
			
		||||
kind: Service
 | 
			
		||||
metadata:
 | 
			
		||||
  labels:
 | 
			
		||||
    app: snmp-exporter
 | 
			
		||||
  name: snmp-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  ports:
 | 
			
		||||
  - name: http-metrics
 | 
			
		||||
    port: 9116
 | 
			
		||||
    protocol: TCP
 | 
			
		||||
    targetPort: metrics
 | 
			
		||||
  selector:
 | 
			
		||||
    app: snmp-exporter
 | 
			
		||||
 | 
			
		||||
@ -1,24 +0,0 @@
 | 
			
		||||
apiVersion: monitoring.coreos.com/v1
 | 
			
		||||
kind: ServiceMonitor
 | 
			
		||||
metadata:
 | 
			
		||||
  labels:
 | 
			
		||||
    k8s-app: snmp-exporter
 | 
			
		||||
  name: snmp-exporter
 | 
			
		||||
spec:
 | 
			
		||||
  jobLabel: k8s-app
 | 
			
		||||
  selector:
 | 
			
		||||
    app: snmp-exporter
 | 
			
		||||
  namespaceSelector:
 | 
			
		||||
    matchNames:
 | 
			
		||||
    - monitoring
 | 
			
		||||
 | 
			
		||||
  endpoints:
 | 
			
		||||
  - interval: 60s
 | 
			
		||||
    port: http-metrics
 | 
			
		||||
    params:
 | 
			
		||||
      module:
 | 
			
		||||
      - ddwrt
 | 
			
		||||
      target:
 | 
			
		||||
      - 192.168.1.1
 | 
			
		||||
    path: "/snmp"
 | 
			
		||||
    targetPort: 9116
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user