diff --git a/manifests-old/alertmanager/alertmanager-config.yaml b/manifests-old/alertmanager/alertmanager-config.yaml deleted file mode 100644 index 62d3901..0000000 --- a/manifests-old/alertmanager/alertmanager-config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: alertmanager-main -data: - alertmanager.yaml: Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0Kcm91dGU6CiAgZ3JvdXBfYnk6IFsnam9iJ10KICBncm91cF93YWl0OiAzMHMKICBncm91cF9pbnRlcnZhbDogNW0KICByZXBlYXRfaW50ZXJ2YWw6IDEyaAogIHJlY2VpdmVyOiAnbnVsbCcKICByb3V0ZXM6CiAgLSBtYXRjaDoKICAgICAgYWxlcnRuYW1lOiBEZWFkTWFuc1N3aXRjaAogICAgcmVjZWl2ZXI6ICdudWxsJwpyZWNlaXZlcnM6Ci0gbmFtZTogJ251bGwnCg== diff --git a/manifests-old/alertmanager/alertmanager-ingress.yaml b/manifests-old/alertmanager/alertmanager-ingress.yaml deleted file mode 100644 index 86c133b..0000000 --- a/manifests-old/alertmanager/alertmanager-ingress.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: alertmanager - namespace: monitoring -spec: - rules: - - host: alertmanager.internal.carlosedp.com - http: - paths: - - path: / - backend: - serviceName: alertmanager-main - servicePort: web - diff --git a/manifests-old/alertmanager/alertmanager-service.yaml b/manifests-old/alertmanager/alertmanager-service.yaml deleted file mode 100644 index a541310..0000000 --- a/manifests-old/alertmanager/alertmanager-service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - alertmanager: main - name: alertmanager-main -spec: - type: NodePort - ports: - - name: web - nodePort: 30903 - port: 9093 - protocol: TCP - targetPort: web - selector: - alertmanager: main diff --git a/manifests-old/alertmanager/alertmanager.yaml b/manifests-old/alertmanager/alertmanager.yaml deleted file mode 100644 index deadd75..0000000 --- a/manifests-old/alertmanager/alertmanager.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: Alertmanager -metadata: - name: main - labels: - alertmanager: main -spec: - replicas: 1 - baseImage: carlosedp/alertmanager - version: v0.14.0 - diff --git a/manifests-old/armexporter/daemonset.yaml b/manifests-old/armexporter/daemonset.yaml deleted file mode 100644 index 9d30040..0000000 --- a/manifests-old/armexporter/daemonset.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: DaemonSet -metadata: - name: arm-exporter - namespace: monitoring - labels: - k8s-app: arm-exporter -spec: - template: - metadata: - name: arm-exporter - labels: - k8s-app: arm-exporter - spec: - hostNetwork: true - containers: - - image: carlosedp/arm_exporter - name: arm-exporter - ports: - - name: http - containerPort: 9243 - hostPort: 9243 diff --git a/manifests-old/armexporter/service.yaml b/manifests-old/armexporter/service.yaml deleted file mode 100644 index ca5d4d0..0000000 --- a/manifests-old/armexporter/service.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - namespace: monitoring - labels: - app: arm-exporter - k8s-app: arm-exporter - name: arm-exporter -spec: - type: ClusterIP - clusterIP: None - ports: - - name: http - port: 9243 - protocol: TCP - selector: - k8s-app: arm-exporter - diff --git a/manifests-old/grafana/grafana-claim.yaml b/manifests-old/grafana/grafana-claim.yaml deleted file mode 100644 index 186bd5c..0000000 --- a/manifests-old/grafana/grafana-claim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: grafana-claim - namespace: monitoring -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 5Gi - diff --git a/manifests-old/grafana/grafana-configmap.yaml b/manifests-old/grafana/grafana-configmap.yaml deleted file mode 100644 index e15a0d0..0000000 --- a/manifests-old/grafana/grafana-configmap.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: grafana-config - namespace: monitoring -data: - config.ini: | - [database] - path = /data/grafana.db - - [paths] - data = /data - logs = /data/log - plugins = /data/plugins - - [session] - provider = memory - - [auth.basic] - enabled = false - - [auth.anonymous] - enabled = false - - [smtp] - enabled = true - host = smtp-server.monitoring.svc:25 - user = - password = - from_address = 'carlosedp@gmail.com' - from_name = Grafana Alert - skip_verify = true diff --git a/manifests-old/grafana/grafana-credentials.yaml b/manifests-old/grafana/grafana-credentials.yaml deleted file mode 100644 index c3da1b6..0000000 --- a/manifests-old/grafana/grafana-credentials.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: grafana-credentials -data: - user: YWRtaW4= - password: YWRtaW4= diff --git a/manifests-old/grafana/grafana-dashboard-definitions.yaml b/manifests-old/grafana/grafana-dashboard-definitions.yaml deleted file mode 100644 index dbd2a30..0000000 --- a/manifests-old/grafana/grafana-dashboard-definitions.yaml +++ /dev/null @@ -1,7360 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: grafana-dashboard-definitions-0 -data: - deployment-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "editable": false, - "graphTooltip": 1, - "hideControls": false, - "links": [], - "rows": [ - { - "collapse": false, - "editable": false, - "height": "200px", - "panels": [ - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 8, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "cores", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m]))", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "CPU", - "type": "singlestat", - "valueFontSize": "110%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 9, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "GB", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "80%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "sum(container_memory_usage_bytes{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}) / 1024^3", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Memory", - "type": "singlestat", - "valueFontSize": "110%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "Bps", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": false - }, - "id": 7, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m]))", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Network", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "100px", - "panels": [ - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": false - }, - "id": 5, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "max(kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "metric": "kube_deployment_spec_replicas", - "refId": "A", - "step": 600 - } - ], - "title": "Desired Replicas", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 6, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "min(kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Available Replicas", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 3, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "max(kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Observed Generation", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 2, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "max(kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Metadata Generation", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "350px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 1, - "isNew": true, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "legendFormat": "current replicas", - "refId": "A", - "step": 30 - }, - { - "expr": "min(kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "legendFormat": "available", - "refId": "B", - "step": 30 - }, - { - "expr": "max(kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "legendFormat": "unavailable", - "refId": "C", - "step": 30 - }, - { - "expr": "min(kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "legendFormat": "updated", - "refId": "D", - "step": 30 - }, - { - "expr": "max(kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "legendFormat": "desired", - "refId": "E", - "step": 30 - } - ], - "title": "Replicas", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "", - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 1, - "show": false - } - ] - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": ".*", - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "deployment_namespace", - "options": [], - "query": "label_values(kube_deployment_metadata_generation, namespace)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": null, - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Deployment", - "multi": false, - "name": "deployment_name", - "options": [], - "query": "label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "deployment", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Deployment", - "version": 1 - } - etcd-dashboard.json: |+ - { - "__inputs": [ - { - "name": "prometheus", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "4.5.2" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - } - ], - "annotations": { - "list": [] - }, - "description": "etcd sample Grafana dashboard with Prometheus", - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [], - "refresh": false, - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 28, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(etcd_server_has_leader)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "metric": "etcd_server_has_leader", - "refId": "A", - "step": 20 - } - ], - "thresholds": "", - "title": "Up", - "type": "singlestat", - "valueFontSize": "200%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 0, - "id": 23, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 5, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(grpc_server_started_total{grpc_type=\"unary\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "RPC Rate", - "metric": "grpc_server_started_total", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(grpc_server_handled_total{grpc_type=\"unary\",grpc_code!=\"OK\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "RPC Failed Rate", - "metric": "grpc_server_handled_total", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "RPC Rate", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 0, - "id": 41, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 4, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Watch Streams", - "metric": "grpc_server_handled_total", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Lease Streams", - "metric": "grpc_server_handled_total", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Active Streams", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": null, - "editable": false, - "error": false, - "fill": 0, - "grid": {}, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "etcd_debugging_mvcc_db_total_size_in_bytes", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} DB Size", - "metric": "", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "DB Size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 0, - "grid": {}, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{instance}} WAL fsync", - "metric": "etcd_disk_wal_fsync_duration_seconds_bucket", - "refId": "A", - "step": 4 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} DB fsync", - "metric": "etcd_disk_backend_commit_duration_seconds_bucket", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Sync Duration", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 0, - "id": 29, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "process_resident_memory_bytes", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Resident Memory", - "metric": "process_resident_memory_bytes", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Memory", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 5, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 3, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(etcd_network_client_grpc_received_bytes_total[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Client Traffic In", - "metric": "etcd_network_client_grpc_received_bytes_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Client Traffic In", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 5, - "id": 21, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 3, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(etcd_network_client_grpc_sent_bytes_total[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Client Traffic Out", - "metric": "etcd_network_client_grpc_sent_bytes_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Client Traffic Out", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 0, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Peer Traffic In", - "metric": "etcd_network_peer_received_bytes_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Peer Traffic In", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": null, - "editable": false, - "error": false, - "fill": 0, - "grid": {}, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}} Peer Traffic Out", - "metric": "etcd_network_peer_sent_bytes_total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Peer Traffic Out", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 0, - "id": 40, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(etcd_server_proposals_failed_total[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Proposal Failure Rate", - "metric": "etcd_server_proposals_failed_total", - "refId": "A", - "step": 2 - }, - { - "expr": "sum(etcd_server_proposals_pending)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Proposal Pending Total", - "metric": "etcd_server_proposals_pending", - "refId": "B", - "step": 2 - }, - { - "expr": "sum(rate(etcd_server_proposals_committed_total[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Proposal Commit Rate", - "metric": "etcd_server_proposals_committed_total", - "refId": "C", - "step": 2 - }, - { - "expr": "sum(rate(etcd_server_proposals_applied_total[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Proposal Apply Rate", - "refId": "D", - "step": 2 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Raft Proposals", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 0, - "editable": false, - "error": false, - "fill": 0, - "id": 19, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "changes(etcd_server_leader_changes_seen_total[1d])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} Total Leader Elections Per Day", - "metric": "etcd_server_leader_changes_seen_total", - "refId": "A", - "step": 2 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Total Leader Elections Per Day", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "New row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": { - "now": true, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "etcd", - "version": 4 - } - kubernetes-capacity-planning-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "editable": false, - "gnetId": 22, - "graphTooltip": 0, - "hideControls": false, - "links": [], - "refresh": false, - "rows": [ - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 3, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(node_cpu{mode=\"idle\"}[2m])) * 100", - "hide": false, - "intervalFactor": 10, - "legendFormat": "", - "refId": "A", - "step": 50 - } - ], - "title": "Idle CPU", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "cpu usage", - "logBase": 1, - "min": 0, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 9, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(node_load1)", - "intervalFactor": 4, - "legendFormat": "load 1m", - "refId": "A", - "step": 20, - "target": "" - }, - { - "expr": "sum(node_load5)", - "intervalFactor": 4, - "legendFormat": "load 5m", - "refId": "B", - "step": 20, - "target": "" - }, - { - "expr": "sum(node_load15)", - "intervalFactor": 4, - "legendFormat": "load 15m", - "refId": "C", - "step": 20, - "target": "" - } - ], - "title": "System Load", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 4, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 9, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)", - "intervalFactor": 2, - "legendFormat": "memory usage", - "metric": "memo", - "refId": "A", - "step": 10, - "target": "" - }, - { - "expr": "sum(node_memory_Buffers)", - "interval": "", - "intervalFactor": 2, - "legendFormat": "memory buffers", - "metric": "memo", - "refId": "B", - "step": 10, - "target": "" - }, - { - "expr": "sum(node_memory_Cached)", - "interval": "", - "intervalFactor": 2, - "legendFormat": "memory cached", - "metric": "memo", - "refId": "C", - "step": 10, - "target": "" - }, - { - "expr": "sum(node_memory_MemFree)", - "interval": "", - "intervalFactor": 2, - "legendFormat": "memory free", - "metric": "memo", - "refId": "D", - "step": 10, - "target": "" - } - ], - "title": "Memory Usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 5, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100", - "intervalFactor": 2, - "metric": "", - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "80, 90", - "title": "Memory Usage", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "246px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 6, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "read", - "yaxis": 1 - }, - { - "alias": "{instance=\"172.17.0.1:9100\"}", - "yaxis": 2 - }, - { - "alias": "io time", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(node_disk_bytes_read[5m]))", - "hide": false, - "intervalFactor": 4, - "legendFormat": "read", - "refId": "A", - "step": 20, - "target": "" - }, - { - "expr": "sum(rate(node_disk_bytes_written[5m]))", - "intervalFactor": 4, - "legendFormat": "written", - "refId": "B", - "step": 20 - }, - { - "expr": "sum(rate(node_disk_io_time_ms[5m]))", - "intervalFactor": 4, - "legendFormat": "io time", - "refId": "C", - "step": 20 - } - ], - "title": "Disk I/O", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "ms", - "logBase": 1, - "show": true - } - ] - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percentunit", - "gauge": { - "maxValue": 1, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 12, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(node_filesystem_size{device!=\"rootfs\"}) - sum(node_filesystem_free{device!=\"rootfs\"})) / sum(node_filesystem_size{device!=\"rootfs\"})", - "intervalFactor": 2, - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "0.75, 0.9", - "title": "Disk Space Usage", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 8, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "transmitted", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(node_network_receive_bytes{device!~\"lo\"}[5m]))", - "hide": false, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10, - "target": "" - } - ], - "title": "Network Received", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "bytes", - "logBase": 1, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 10, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "transmitted", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(node_network_transmit_bytes{device!~\"lo\"}[5m]))", - "hide": false, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10, - "target": "" - } - ], - "title": "Network Transmitted", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "bytes", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "276px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 11, - "isNew": true, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 11, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_pod_info)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current number of Pods", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(kube_node_status_capacity_pods)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Maximum capacity of pods", - "refId": "B", - "step": 10 - } - ], - "title": "Cluster Pod Utilization", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 7, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "80, 90", - "title": "Pod Utilization", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kubernetes Capacity Planning", - "version": 4 - } - kubernetes-cluster-health-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "editable": false, - "graphTooltip": 0, - "hideControls": false, - "links": [], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "editable": false, - "height": "254px", - "panels": [ - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 1, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(up{job=~\"apiserver|kube-scheduler|kube-controller-manager\"} == 0)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Control Plane Components Down", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "Everything UP and healthy", - "value": "null" - }, - { - "op": "=", - "text": "", - "value": "" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 2, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Alerts Firing", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 3, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(ALERTS{alertstate=\"pending\",alertname!=\"DeadMansSwitch\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "3, 5", - "title": "Alerts Pending", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 4, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "count(increase(kube_pod_container_status_restarts[1h]) > 5)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Crashlooping Pods", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - } - ], - "showTitle": false, - "title": "Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 5, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(kube_node_status_condition{condition=\"Ready\",status!=\"true\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Node Not Ready", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 6, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(kube_node_status_condition{condition=\"DiskPressure\",status=\"true\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Node Disk Pressure", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 7, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Node Memory Pressure", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 8, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(kube_node_spec_unschedulable)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Nodes Unschedulable", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "showTitle": false, - "title": "Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kubernetes Cluster Health", - "version": 9 - } - kubernetes-cluster-status-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "editable": false, - "graphTooltip": 0, - "hideControls": false, - "links": [], - "rows": [ - { - "collapse": false, - "editable": false, - "height": "129px", - "panels": [ - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 5, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 6, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(up{job=~\"apiserver|kube-scheduler|kube-controller-manager\"} == 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Control Plane UP", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "UP", - "value": "null" - } - ], - "valueName": "total" - }, - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 6, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 6, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"})", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "3, 5", - "title": "Alerts Firing", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - } - ], - "showTitle": true, - "title": "Cluster Health", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "168px", - "panels": [ - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 1, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(up{job=\"apiserver\"} == 1) / count(up{job=\"apiserver\"})) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "50, 80", - "title": "API Servers UP", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 2, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(up{job=\"kube-controller-manager\"} == 1) / count(up{job=\"kube-controller-manager\"})) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "50, 80", - "title": "Controller Managers UP", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 3, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(up{job=\"kube-scheduler\"} == 1) / count(up{job=\"kube-scheduler\"})) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "50, 80", - "title": "Schedulers UP", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 4, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "count(increase(kube_pod_container_status_restarts{namespace=~\"kube-system|tectonic-system\"}[1h]) > 5)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "1, 3", - "title": "Crashlooping Control Plane Pods", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - } - ], - "showTitle": true, - "title": "Control Plane Status", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "158px", - "panels": [ - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 8, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "sum(100 - (avg by (instance) (rate(node_cpu{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)) / count(node_cpu{job=\"node-exporter\",mode=\"idle\"})", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "80, 90", - "title": "CPU Utilization", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 7, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "80, 90", - "title": "Memory Utilization", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 9, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(node_filesystem_size{device!=\"rootfs\"}) - sum(node_filesystem_free{device!=\"rootfs\"})) / sum(node_filesystem_size{device!=\"rootfs\"})", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "80, 90", - "title": "Filesystem Utilization", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 10, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "80, 90", - "title": "Pod Utilization", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": true, - "title": "Capacity Planning", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kubernetes Cluster Status", - "version": 3 - } - kubernetes-control-plane-status-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "editable": false, - "graphTooltip": 0, - "hideControls": false, - "links": [], - "rows": [ - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 1, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(up{job=\"apiserver\"} == 1) / sum(up{job=\"apiserver\"})) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "50, 80", - "title": "API Servers UP", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 2, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(up{job=\"kube-controller-manager\"} == 1) / sum(up{job=\"kube-controller-manager\"})) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "50, 80", - "title": "Controller Managers UP", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 3, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(up{job=\"kube-scheduler\"} == 1) / sum(up{job=\"kube-scheduler\"})) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "thresholds": "50, 80", - "title": "Schedulers UP", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 4, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "max(sum by(instance) (rate(apiserver_request_count{code=~\"5..\"}[5m])) / sum by(instance) (rate(apiserver_request_count[5m]))) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600 - } - ], - "thresholds": "5, 10", - "title": "API Server Request Error Rate", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 7, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 30 - } - ], - "title": "API Server Request Latency", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 5, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "cluster:scheduler_e2e_scheduling_latency_seconds:quantile", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "title": "End to End Scheduling Latency", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "dtdurations", - "logBase": 1, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 6, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(instance) (rate(apiserver_request_count{code!~\"2..\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Error Rate", - "refId": "A", - "step": 60 - }, - { - "expr": "sum by(instance) (rate(apiserver_request_count[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Request Rate", - "refId": "B", - "step": 60 - } - ], - "title": "API Server Request Rates", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kubernetes Control Plane Status", - "version": 3 - } - kubernetes-resource-requests-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "editable": false, - "graphTooltip": 0, - "hideControls": false, - "links": [], - "refresh": false, - "rows": [ - { - "collapse": false, - "editable": false, - "height": "300px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "description": "This represents the total [CPU resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu) in the cluster.\nFor comparison the total [allocatable CPU cores](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 1, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "min(sum(kube_node_status_allocatable_cpu_cores) by (instance))", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Allocatable CPU Cores", - "refId": "A", - "step": 20 - }, - { - "expr": "max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance))", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Requested CPU Cores", - "refId": "B", - "step": 20 - } - ], - "title": "CPU Cores", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "CPU Cores", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 2, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance)) / min(sum(kube_node_status_allocatable_cpu_cores) by (instance)) * 100", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 240 - } - ], - "thresholds": "80, 90", - "title": "CPU Cores", - "transparent": false, - "type": "singlestat", - "valueFontSize": "110%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "CPU Cores", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "300px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "description": "This represents the total [memory resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory) in the cluster.\nFor comparison the total [allocatable memory](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 3, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "min(sum(kube_node_status_allocatable_memory_bytes) by (instance))", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Allocatable Memory", - "refId": "A", - "step": 20 - }, - { - "expr": "max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance))", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Requested Memory", - "refId": "B", - "step": 20 - } - ], - "title": "Memory", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "Memory", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 4, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance)) / min(sum(kube_node_status_allocatable_memory_bytes) by (instance)) * 100", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 240 - } - ], - "thresholds": "80, 90", - "title": "Memory", - "transparent": false, - "type": "singlestat", - "valueFontSize": "110%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "Memory", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kubernetes Resource Requests", - "version": 2 - } - nodes-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Dashboard to get an overview of one server", - "editable": false, - "gnetId": 22, - "graphTooltip": 0, - "hideControls": false, - "links": [], - "refresh": false, - "rows": [ - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 3, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100 - (avg by (cpu) (irate(node_cpu{mode=\"idle\", instance=\"$server\"}[5m])) * 100)", - "hide": false, - "intervalFactor": 10, - "legendFormat": "{{cpu}}", - "refId": "A", - "step": 50 - } - ], - "title": "Idle CPU", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "cpu usage", - "logBase": 1, - "max": 100, - "min": 0, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 9, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_load1{instance=\"$server\"}", - "intervalFactor": 4, - "legendFormat": "load 1m", - "refId": "A", - "step": 20, - "target": "" - }, - { - "expr": "node_load5{instance=\"$server\"}", - "intervalFactor": 4, - "legendFormat": "load 5m", - "refId": "B", - "step": 20, - "target": "" - }, - { - "expr": "node_load15{instance=\"$server\"}", - "intervalFactor": 4, - "legendFormat": "load 15m", - "refId": "C", - "step": 20, - "target": "" - } - ], - "title": "System Load", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 4, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 9, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal{instance=\"$server\"} - node_memory_MemFree{instance=\"$server\"} - node_memory_Buffers{instance=\"$server\"} - node_memory_Cached{instance=\"$server\"}", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "memory used", - "metric": "", - "refId": "C", - "step": 10 - }, - { - "expr": "node_memory_Buffers{instance=\"$server\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "memory buffers", - "metric": "", - "refId": "E", - "step": 10 - }, - { - "expr": "node_memory_Cached{instance=\"$server\"}", - "intervalFactor": 2, - "legendFormat": "memory cached", - "metric": "", - "refId": "F", - "step": 10 - }, - { - "expr": "node_memory_MemFree{instance=\"$server\"}", - "intervalFactor": 2, - "legendFormat": "memory free", - "metric": "", - "refId": "D", - "step": 10 - } - ], - "title": "Memory Usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 5, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "((node_memory_MemTotal{instance=\"$server\"} - node_memory_MemFree{instance=\"$server\"} - node_memory_Buffers{instance=\"$server\"} - node_memory_Cached{instance=\"$server\"}) / node_memory_MemTotal{instance=\"$server\"}) * 100", - "intervalFactor": 2, - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "80, 90", - "title": "Memory Usage", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 6, - "isNew": true, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "read", - "yaxis": 1 - }, - { - "alias": "{instance=\"172.17.0.1:9100\"}", - "yaxis": 2 - }, - { - "alias": "io time", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (instance) (rate(node_disk_bytes_read{instance=\"$server\"}[2m]))", - "hide": false, - "intervalFactor": 4, - "legendFormat": "read", - "refId": "A", - "step": 20, - "target": "" - }, - { - "expr": "sum by (instance) (rate(node_disk_bytes_written{instance=\"$server\"}[2m]))", - "intervalFactor": 4, - "legendFormat": "written", - "refId": "B", - "step": 20 - }, - { - "expr": "sum by (instance) (rate(node_disk_io_time_ms{instance=\"$server\"}[2m]))", - "intervalFactor": 4, - "legendFormat": "io time", - "refId": "C", - "step": 20 - } - ], - "title": "Disk I/O", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "ms", - "logBase": 1, - "show": true - } - ] - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": false, - "format": "percentunit", - "gauge": { - "maxValue": 1, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "hideTimeOverride": false, - "id": 7, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "(sum(node_filesystem_size{device!=\"rootfs\",instance=\"$server\"}) - sum(node_filesystem_free{device!=\"rootfs\",instance=\"$server\"})) / sum(node_filesystem_size{device!=\"rootfs\",instance=\"$server\"})", - "intervalFactor": 2, - "refId": "A", - "step": 60, - "target": "" - } - ], - "thresholds": "0.75, 0.9", - "title": "Disk Space Usage", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 8, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "transmitted", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_network_receive_bytes{instance=\"$server\",device!~\"lo\"}[5m])", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A", - "step": 10, - "target": "" - } - ], - "title": "Network Received", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "bytes", - "logBase": 1, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 10, - "isNew": false, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "transmitted", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_network_transmit_bytes{instance=\"$server\",device!~\"lo\"}[5m])", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "B", - "step": 10, - "target": "" - } - ], - "title": "Network Transmitted", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "bytes", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "server", - "options": [], - "query": "label_values(node_boot_time, instance)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Nodes", - "version": 2 - } - pods-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "editable": false, - "graphTooltip": 1, - "hideControls": false, - "links": [], - "refresh": false, - "rows": [ - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 1, - "isNew": false, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Current: {{ container_name }}", - "metric": "container_memory_usage_bytes", - "refId": "A", - "step": 15 - }, - { - "expr": "kube_pod_container_resource_requests_memory_bytes{pod=\"$pod\", container=~\"$container\"}", - "interval": "10s", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "metric": "kube_pod_container_resource_requests_memory_bytes", - "refId": "B", - "step": 20 - }, - { - "expr": "kube_pod_container_resource_limits_memory_bytes{pod=\"$pod\", container=~\"$container\"}", - "interval": "10s", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "metric": "kube_pod_container_resource_limits_memory_bytes", - "refId": "C", - "step": 20 - } - ], - "title": "Memory Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 2, - "isNew": false, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (container_name)(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))", - "intervalFactor": 2, - "legendFormat": "{{ container_name }}", - "refId": "A", - "step": 30 - }, - { - "expr": "kube_pod_container_resource_requests_cpu_cores{pod=\"$pod\", container=~\"$container\"}", - "interval": "10s", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "metric": "kube_pod_container_resource_requests_cpu_cores", - "refId": "B", - "step": 20 - }, - { - "expr": "kube_pod_container_resource_limits_cpu_cores{pod=\"$pod\", container=~\"$container\"}", - "interval": "10s", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "metric": "kube_pod_container_resource_limits_memory_bytes", - "refId": "C", - "step": 20 - } - ], - "title": "CPU Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 3, - "isNew": false, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{pod_name=\"$pod\"}[1m])))", - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "refId": "A", - "step": 30 - } - ], - "title": "Network I/O", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ] - } - ], - "showTitle": false, - "title": "New Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": ".*", - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": true, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - "query": "label_values(kube_pod_info, namespace)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Pod", - "multi": false, - "name": "pod", - "options": [], - "query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".*", - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": true, - "label": "Container", - "multi": false, - "name": "container", - "options": [], - "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Pods", - "version": 1 - } - statefulset-dashboard.json: |+ - { - "__inputs": [ - { - "description": "", - "label": "prometheus", - "name": "prometheus", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "editable": false, - "graphTooltip": 1, - "hideControls": false, - "links": [], - "rows": [ - { - "collapse": false, - "editable": false, - "height": "200px", - "panels": [ - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 8, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "cores", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m]))", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "CPU", - "type": "singlestat", - "valueFontSize": "110%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 9, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "GB", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "80%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "sum(container_memory_usage_bytes{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}) / 1024^3", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Memory", - "type": "singlestat", - "valueFontSize": "110%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "Bps", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": false - }, - "id": 7, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 4, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "targets": [ - { - "expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m]))", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Network", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "100px", - "panels": [ - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": false - }, - "id": 5, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "max(kube_statefulset_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "metric": "kube_statefulset_replicas", - "refId": "A", - "step": 600 - } - ], - "title": "Desired Replicas", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 6, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "min(kube_statefulset_status_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Available Replicas", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 3, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "max(kube_statefulset_status_observed_generation{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Observed Generation", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "prometheus", - "editable": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 2, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "targets": [ - { - "expr": "max(kube_statefulset_metadata_generation{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "refId": "A", - "step": 600 - } - ], - "title": "Metadata Generation", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "editable": false, - "height": "350px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": false, - "error": false, - "fill": 1, - "grid": { - "threshold1Color": "rgba(216, 200, 27, 0.27)", - "threshold2Color": "rgba(234, 112, 112, 0.22)" - }, - "id": 1, - "isNew": true, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "min(kube_statefulset_status_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "legendFormat": "available", - "refId": "B", - "step": 30 - }, - { - "expr": "max(kube_statefulset_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)", - "intervalFactor": 2, - "legendFormat": "desired", - "refId": "E", - "step": 30 - } - ], - "title": "Replicas", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "", - "logBase": 1, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 1, - "show": false - } - ] - } - ], - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "sharedCrosshair": false, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": ".*", - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "statefulset_namespace", - "options": [], - "query": "label_values(kube_statefulset_metadata_generation, namespace)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": null, - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "StatefulSet", - "multi": false, - "name": "statefulset_name", - "options": [], - "query": "label_values(kube_statefulset_metadata_generation{namespace=\"$statefulset_namespace\"}, statefulset)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "statefulset", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "StatefulSet", - "version": 1 - } ---- diff --git a/manifests-old/grafana/grafana-dashboards.yaml b/manifests-old/grafana/grafana-dashboards.yaml deleted file mode 100644 index 772d3f6..0000000 --- a/manifests-old/grafana/grafana-dashboards.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: grafana-dashboards -data: - dashboards.yaml: |+ - - name: '0' - org_id: 1 - folder: '' - type: file - options: - folder: /grafana-dashboard-definitions/0 diff --git a/manifests-old/grafana/grafana-datasources.yaml b/manifests-old/grafana/grafana-datasources.yaml deleted file mode 100644 index 33c3081..0000000 --- a/manifests-old/grafana/grafana-datasources.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: grafana-datasources -data: - prometheus.yaml: |+ - datasources: - - name: prometheus - type: prometheus - access: proxy - org_id: 1 - url: http://prometheus-k8s.monitoring.svc:9090 - version: 1 - editable: false - diff --git a/manifests-old/grafana/grafana-deployment.yaml b/manifests-old/grafana/grafana-deployment.yaml deleted file mode 100644 index 0f0642f..0000000 --- a/manifests-old/grafana/grafana-deployment.yaml +++ /dev/null @@ -1,60 +0,0 @@ -apiVersion: apps/v1beta1 -kind: Deployment -metadata: - name: grafana -spec: - replicas: 1 - template: - metadata: - labels: - app: grafana - spec: - securityContext: - runAsNonRoot: true - runAsUser: 65534 - containers: - - name: grafana - image: carlosedp/monitoring-grafana:v5.1.3 - volumeMounts: - - name: grafana-config - mountPath: /grafana/conf/config.ini - subPath: config.ini - - name: grafana-storage - mountPath: /data - - name: grafana-datasources - mountPath: /grafana/conf/provisioning/datasources - - name: grafana-dashboards - mountPath: /grafana/conf/provisioning/dashboards - - name: grafana-dashboard-definitions-0 - mountPath: /grafana-dashboard-definitions/0 - ports: - - name: web - containerPort: 3000 - env: - - name: GF_INSTALL_PLUGINS - value: "grafana-clock-panel,grafana-piechart-panel" - - name: GF_PATHS_PLUGINS - value: "/data/plugins" - resources: - requests: - memory: 100Mi - cpu: 100m - limits: - memory: 200Mi - cpu: 200m - volumes: - - name: grafana-config - configMap: - name: grafana-config - - name: grafana-storage - persistentVolumeClaim: - claimName: grafana-claim - - name: grafana-datasources - configMap: - name: grafana-datasources - - name: grafana-dashboards - configMap: - name: grafana-dashboards - - name: grafana-dashboard-definitions-0 - configMap: - name: grafana-dashboard-definitions-0 diff --git a/manifests-old/grafana/grafana-external-ingress.yaml b/manifests-old/grafana/grafana-external-ingress.yaml deleted file mode 100644 index 7b79f0d..0000000 --- a/manifests-old/grafana/grafana-external-ingress.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: grafana-external - namespace: monitoring - labels: - traffic-type: external - annotations: - traefik.frontend.rule.type: PathPrefix - #traefik.frontend.redirect.regex: ^http://(.*) - #traefik.frontend.redirect.replacement: https://$1 -spec: - rules: - - host: grafana.cloud.carlosedp.com - http: - paths: - - path: / - backend: - serviceName: grafana - servicePort: 3000 diff --git a/manifests-old/grafana/grafana-ingress.yaml b/manifests-old/grafana/grafana-ingress.yaml deleted file mode 100644 index cc55605..0000000 --- a/manifests-old/grafana/grafana-ingress.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: grafana - namespace: monitoring -spec: - rules: - - host: grafana.internal.carlosedp.com - http: - paths: - - path: / - backend: - serviceName: grafana - servicePort: 3000 - diff --git a/manifests-old/grafana/grafana-service.yaml b/manifests-old/grafana/grafana-service.yaml deleted file mode 100644 index fbcee40..0000000 --- a/manifests-old/grafana/grafana-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: grafana - labels: - app: grafana -spec: - type: NodePort - ports: - - port: 3000 - protocol: TCP - nodePort: 30902 - targetPort: web - selector: - app: grafana diff --git a/manifests-old/k8s/kube-controller-manager.yaml b/manifests-old/k8s/kube-controller-manager.yaml deleted file mode 100644 index bd8d7cb..0000000 --- a/manifests-old/k8s/kube-controller-manager.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - namespace: kube-system - name: kube-controller-manager-prometheus-discovery - labels: - k8s-app: kube-controller-manager -spec: - selector: - component: kube-controller-manager - type: ClusterIP - clusterIP: None - ports: - - name: http-metrics - port: 10252 - targetPort: 10252 - protocol: TCP diff --git a/manifests-old/k8s/kube-scheduler.yaml b/manifests-old/k8s/kube-scheduler.yaml deleted file mode 100644 index 2d90097..0000000 --- a/manifests-old/k8s/kube-scheduler.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - namespace: kube-system - name: kube-scheduler-prometheus-discovery - labels: - k8s-app: kube-scheduler -spec: - selector: - component: kube-scheduler - type: ClusterIP - clusterIP: None - ports: - - name: http-metrics - port: 10251 - targetPort: 10251 - protocol: TCP diff --git a/manifests-old/kube-state-metrics/kube-state-metrics-cluster-role-binding.yaml b/manifests-old/kube-state-metrics/kube-state-metrics-cluster-role-binding.yaml deleted file mode 100644 index 9a8f311..0000000 --- a/manifests-old/kube-state-metrics/kube-state-metrics-cluster-role-binding.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: kube-state-metrics -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kube-state-metrics -subjects: -- kind: ServiceAccount - name: kube-state-metrics - namespace: monitoring diff --git a/manifests-old/kube-state-metrics/kube-state-metrics-cluster-role.yaml b/manifests-old/kube-state-metrics/kube-state-metrics-cluster-role.yaml deleted file mode 100644 index 49de56c..0000000 --- a/manifests-old/kube-state-metrics/kube-state-metrics-cluster-role.yaml +++ /dev/null @@ -1,45 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: kube-state-metrics -rules: -- apiGroups: [""] - resources: - - nodes - - pods - - services - - resourcequotas - - replicationcontrollers - - limitranges - - persistentvolumeclaims - - persistentvolumes - - namespaces - - endpoints - verbs: ["list", "watch"] -- apiGroups: ["extensions"] - resources: - - daemonsets - - deployments - - replicasets - verbs: ["list", "watch"] -- apiGroups: ["apps"] - resources: - - statefulsets - verbs: ["list", "watch"] -- apiGroups: ["batch"] - resources: - - cronjobs - - jobs - verbs: ["list", "watch"] -- apiGroups: ["autoscaling"] - resources: - - horizontalpodautoscalers - verbs: ["list", "watch"] -- apiGroups: ["authentication.k8s.io"] - resources: - - tokenreviews - verbs: ["create"] -- apiGroups: ["authorization.k8s.io"] - resources: - - subjectaccessreviews - verbs: ["create"] diff --git a/manifests-old/kube-state-metrics/kube-state-metrics-deployment.yaml b/manifests-old/kube-state-metrics/kube-state-metrics-deployment.yaml deleted file mode 100644 index 41d450a..0000000 --- a/manifests-old/kube-state-metrics/kube-state-metrics-deployment.yaml +++ /dev/null @@ -1,55 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - name: kube-state-metrics -spec: - replicas: 1 - template: - metadata: - labels: - app: kube-state-metrics - spec: - serviceAccountName: kube-state-metrics - securityContext: - runAsNonRoot: true - runAsUser: 65534 - containers: - - name: kube-state-metrics - image: carlosedp/kube-state-metrics:v1.2.0 - args: - - "--host=0.0.0.0" - - "--port=8443" - - "--telemetry-host=0.0.0.0" - - "--telemetry-port=9443" - ports: - - name: http-main - containerPort: 8443 - - name: http-self - containerPort: 9443 - - name: addon-resizer - image: carlosedp/addon-resizer:2.1 - resources: - limits: - cpu: 100m - memory: 30Mi - requests: - cpu: 100m - memory: 30Mi - env: - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: MY_POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - command: - - /pod_nanny - - --container=kube-state-metrics - - --cpu=100m - - --extra-cpu=2m - - --memory=150Mi - - --extra-memory=30Mi - #- --threshold=5 - - --deployment=kube-state-metrics diff --git a/manifests-old/kube-state-metrics/kube-state-metrics-role-binding.yaml b/manifests-old/kube-state-metrics/kube-state-metrics-role-binding.yaml deleted file mode 100644 index ddd64a6..0000000 --- a/manifests-old/kube-state-metrics/kube-state-metrics-role-binding.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: kube-state-metrics -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: kube-state-metrics-resizer -subjects: -- kind: ServiceAccount - name: kube-state-metrics - diff --git a/manifests-old/kube-state-metrics/kube-state-metrics-role.yaml b/manifests-old/kube-state-metrics/kube-state-metrics-role.yaml deleted file mode 100644 index a07e4f2..0000000 --- a/manifests-old/kube-state-metrics/kube-state-metrics-role.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: kube-state-metrics-resizer -rules: -- apiGroups: [""] - resources: - - pods - verbs: ["get"] -- apiGroups: ["extensions"] - resources: - - deployments - resourceNames: ["kube-state-metrics"] - verbs: ["get", "update"] - diff --git a/manifests-old/kube-state-metrics/kube-state-metrics-service-account.yaml b/manifests-old/kube-state-metrics/kube-state-metrics-service-account.yaml deleted file mode 100644 index 9977935..0000000 --- a/manifests-old/kube-state-metrics/kube-state-metrics-service-account.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: kube-state-metrics diff --git a/manifests-old/kube-state-metrics/kube-state-metrics-service.yaml b/manifests-old/kube-state-metrics/kube-state-metrics-service.yaml deleted file mode 100644 index dfe9d9a..0000000 --- a/manifests-old/kube-state-metrics/kube-state-metrics-service.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - app: kube-state-metrics - k8s-app: kube-state-metrics - name: kube-state-metrics -spec: - clusterIP: None - ports: - - name: http-main - port: 8443 - targetPort: http-main - protocol: TCP - - name: http-self - port: 9443 - targetPort: http-self - protocol: TCP - selector: - app: kube-state-metrics - diff --git a/manifests-old/node-exporter/node-exporter-cluster-role-binding.yaml b/manifests-old/node-exporter/node-exporter-cluster-role-binding.yaml deleted file mode 100644 index a5a2050..0000000 --- a/manifests-old/node-exporter/node-exporter-cluster-role-binding.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: node-exporter -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: node-exporter -subjects: -- kind: ServiceAccount - name: node-exporter - namespace: monitoring diff --git a/manifests-old/node-exporter/node-exporter-cluster-role.yaml b/manifests-old/node-exporter/node-exporter-cluster-role.yaml deleted file mode 100644 index 932b776..0000000 --- a/manifests-old/node-exporter/node-exporter-cluster-role.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: node-exporter -rules: -- apiGroups: ["authentication.k8s.io"] - resources: - - tokenreviews - verbs: ["create"] -- apiGroups: ["authorization.k8s.io"] - resources: - - subjectaccessreviews - verbs: ["create"] diff --git a/manifests-old/node-exporter/node-exporter-daemonset.yaml b/manifests-old/node-exporter/node-exporter-daemonset.yaml deleted file mode 100644 index ad4170d..0000000 --- a/manifests-old/node-exporter/node-exporter-daemonset.yaml +++ /dev/null @@ -1,59 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: DaemonSet -metadata: - name: node-exporter -spec: - updateStrategy: - rollingUpdate: - maxUnavailable: 1 - type: RollingUpdate - template: - metadata: - labels: - app: node-exporter - name: node-exporter - spec: - serviceAccountName: node-exporter - securityContext: - runAsNonRoot: true - runAsUser: 65534 - hostNetwork: true - hostPID: true - containers: - - image: carlosedp/node_exporter:v0.15.2 - args: - #- "--web.listen-address=0.0.0.1:9100" - - "--path.procfs=/host/proc" - - "--path.sysfs=/host/sys" - - "--collector.filesystem.ignored-mount-points" - - '^(\/(host|root)\/sys\/kernel\/debug\/).*' - name: node-exporter - ports: - - containerPort: 9100 - hostPort: 9100 - name: http - resources: - requests: - memory: 30Mi - cpu: 100m - limits: - memory: 50Mi - cpu: 200m - volumeMounts: - - name: proc - readOnly: true - mountPath: /host/proc - - name: sys - readOnly: true - mountPath: /host/sys - tolerations: - - effect: NoSchedule - operator: Exists - volumes: - - name: proc - hostPath: - path: /proc - - name: sys - hostPath: - path: /sys - diff --git a/manifests-old/node-exporter/node-exporter-service-account.yaml b/manifests-old/node-exporter/node-exporter-service-account.yaml deleted file mode 100644 index 703a274..0000000 --- a/manifests-old/node-exporter/node-exporter-service-account.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: node-exporter diff --git a/manifests-old/node-exporter/node-exporter-service.yaml b/manifests-old/node-exporter/node-exporter-service.yaml deleted file mode 100644 index f9bfc58..0000000 --- a/manifests-old/node-exporter/node-exporter-service.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - app: node-exporter - k8s-app: node-exporter - name: node-exporter -spec: - type: ClusterIP - clusterIP: None - ports: - - name: http - port: 9100 - protocol: TCP - selector: - app: node-exporter - diff --git a/manifests-old/prometheus-operator/prometheus-operator-cluster-role-binding.yaml b/manifests-old/prometheus-operator/prometheus-operator-cluster-role-binding.yaml deleted file mode 100644 index e7e03a2..0000000 --- a/manifests-old/prometheus-operator/prometheus-operator-cluster-role-binding.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: prometheus-operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus-operator -subjects: -- kind: ServiceAccount - name: prometheus-operator - namespace: monitoring diff --git a/manifests-old/prometheus-operator/prometheus-operator-cluster-role.yaml b/manifests-old/prometheus-operator/prometheus-operator-cluster-role.yaml deleted file mode 100644 index 1b13c89..0000000 --- a/manifests-old/prometheus-operator/prometheus-operator-cluster-role.yaml +++ /dev/null @@ -1,54 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRole -metadata: - name: prometheus-operator -rules: -- apiGroups: - - extensions - resources: - - thirdpartyresources - verbs: - - "*" -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - "*" -- apiGroups: - - monitoring.coreos.com - resources: - - alertmanagers - - prometheuses - - prometheuses/finalizers - - alertmanagers/finalizers - - servicemonitors - verbs: - - "*" -- apiGroups: - - apps - resources: - - statefulsets - verbs: ["*"] -- apiGroups: [""] - resources: - - configmaps - - secrets - verbs: ["*"] -- apiGroups: [""] - resources: - - pods - verbs: ["list", "delete"] -- apiGroups: [""] - resources: - - services - - endpoints - verbs: ["get", "create", "update"] -- apiGroups: [""] - resources: - - nodes - verbs: ["list", "watch"] -- apiGroups: [""] - resources: - - namespaces - verbs: ["list"] diff --git a/manifests-old/prometheus-operator/prometheus-operator-service-account.yaml b/manifests-old/prometheus-operator/prometheus-operator-service-account.yaml deleted file mode 100644 index 38d18cc..0000000 --- a/manifests-old/prometheus-operator/prometheus-operator-service-account.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: prometheus-operator diff --git a/manifests-old/prometheus-operator/prometheus-operator-service.yaml b/manifests-old/prometheus-operator/prometheus-operator-service.yaml deleted file mode 100644 index 8882d4a..0000000 --- a/manifests-old/prometheus-operator/prometheus-operator-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: prometheus-operator - labels: - k8s-app: prometheus-operator -spec: - type: ClusterIP - ports: - - name: http - port: 8080 - targetPort: http - protocol: TCP - selector: - k8s-app: prometheus-operator diff --git a/manifests-old/prometheus-operator/prometheus-operator.yaml b/manifests-old/prometheus-operator/prometheus-operator.yaml deleted file mode 100644 index 6308cad..0000000 --- a/manifests-old/prometheus-operator/prometheus-operator.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - labels: - k8s-app: prometheus-operator - name: prometheus-operator -spec: - replicas: 1 - template: - metadata: - labels: - k8s-app: prometheus-operator - spec: - containers: - - args: - - --kubelet-service=kube-system/kubelet - - --prometheus-config-reloader=carlosedp/prometheus-config-reloader:v0.0.2 - - --config-reloader-image=carlosedp/configmap-reload:v0.2.2-arm - image: carlosedp/prometheus-operator:v0.17.0 - name: prometheus-operator - ports: - - containerPort: 8080 - name: http - resources: - limits: - cpu: 200m - memory: 100Mi - requests: - cpu: 100m - memory: 50Mi - securityContext: - runAsNonRoot: true - runAsUser: 65534 - serviceAccountName: prometheus-operator diff --git a/manifests-old/prometheus/prometheus-k8s-ingress.yaml b/manifests-old/prometheus/prometheus-k8s-ingress.yaml deleted file mode 100644 index 7dccd73..0000000 --- a/manifests-old/prometheus/prometheus-k8s-ingress.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Ingress -metadata: - name: prometheus - namespace: monitoring -spec: - rules: - - host: prometheus.internal.carlosedp.com - http: - paths: - - path: / - backend: - serviceName: prometheus-k8s - servicePort: web diff --git a/manifests-old/prometheus/prometheus-k8s-role-bindings.yaml b/manifests-old/prometheus/prometheus-k8s-role-bindings.yaml deleted file mode 100644 index 5f190e7..0000000 --- a/manifests-old/prometheus/prometheus-k8s-role-bindings.yaml +++ /dev/null @@ -1,54 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: RoleBinding -metadata: - name: prometheus-k8s - namespace: monitoring -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: prometheus-k8s -subjects: -- kind: ServiceAccount - name: prometheus-k8s - namespace: monitoring ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: RoleBinding -metadata: - name: prometheus-k8s - namespace: kube-system -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: prometheus-k8s -subjects: -- kind: ServiceAccount - name: prometheus-k8s - namespace: monitoring ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: RoleBinding -metadata: - name: prometheus-k8s - namespace: default -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: prometheus-k8s -subjects: -- kind: ServiceAccount - name: prometheus-k8s - namespace: monitoring ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRoleBinding -metadata: - name: prometheus-k8s -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus-k8s -subjects: -- kind: ServiceAccount - name: prometheus-k8s - namespace: monitoring diff --git a/manifests-old/prometheus/prometheus-k8s-roles.yaml b/manifests-old/prometheus/prometheus-k8s-roles.yaml deleted file mode 100644 index 4f738e7..0000000 --- a/manifests-old/prometheus/prometheus-k8s-roles.yaml +++ /dev/null @@ -1,55 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: Role -metadata: - name: prometheus-k8s - namespace: monitoring -rules: -- apiGroups: [""] - resources: - - nodes - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: - - configmaps - verbs: ["get"] ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: Role -metadata: - name: prometheus-k8s - namespace: kube-system -rules: -- apiGroups: [""] - resources: - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: Role -metadata: - name: prometheus-k8s - namespace: default -rules: -- apiGroups: [""] - resources: - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: ClusterRole -metadata: - name: prometheus-k8s -rules: -- apiGroups: [""] - resources: - - nodes/metrics - verbs: ["get"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] diff --git a/manifests-old/prometheus/prometheus-k8s-rules.yaml b/manifests-old/prometheus/prometheus-k8s-rules.yaml deleted file mode 100644 index d563a57..0000000 --- a/manifests-old/prometheus/prometheus-k8s-rules.yaml +++ /dev/null @@ -1,602 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: prometheus-k8s-rules - labels: - role: prometheus-rulefiles - prometheus: k8s -data: - alertmanager.rules.yaml: |+ - groups: - - name: alertmanager.rules - rules: - - alert: AlertmanagerConfigInconsistent - expr: count_values("config_hash", alertmanager_config_hash) BY (service) / ON(service) - GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas, "service", - "alertmanager-$1", "alertmanager", "(.*)") != 1 - for: 5m - labels: - severity: critical - annotations: - description: The configuration of the instances of the Alertmanager cluster - `{{$labels.service}}` are out of sync. - - alert: AlertmanagerDownOrMissing - expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", - "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1 - for: 5m - labels: - severity: warning - annotations: - description: An unexpected number of Alertmanagers are scraped or Alertmanagers - disappeared from discovery. - - alert: AlertmanagerFailedReload - expr: alertmanager_config_last_reload_successful == 0 - for: 10m - labels: - severity: warning - annotations: - description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace - }}/{{ $labels.pod}}. - etcd3.rules.yaml: |+ - groups: - - name: ./etcd3.rules - rules: - - alert: InsufficientMembers - expr: count(up{job="etcd"} == 0) > (count(up{job="etcd"}) / 2 - 1) - for: 3m - labels: - severity: critical - annotations: - description: If one more etcd member goes down the cluster will be unavailable - summary: etcd cluster insufficient members - - alert: NoLeader - expr: etcd_server_has_leader{job="etcd"} == 0 - for: 1m - labels: - severity: critical - annotations: - description: etcd member {{ $labels.instance }} has no leader - summary: etcd member has no leader - - alert: HighNumberOfLeaderChanges - expr: increase(etcd_server_leader_changes_seen_total{job="etcd"}[1h]) > 3 - labels: - severity: warning - annotations: - description: etcd instance {{ $labels.instance }} has seen {{ $value }} leader - changes within the last hour - summary: a high number of leader changes within the etcd cluster are happening - - alert: HighNumberOfFailedGRPCRequests - expr: sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd"}[5m])) BY (grpc_service, grpc_method) - / sum(rate(grpc_server_handled_total{job="etcd"}[5m])) BY (grpc_service, grpc_method) > 0.01 - for: 10m - labels: - severity: warning - annotations: - description: '{{ $value }}% of requests for {{ $labels.grpc_method }} failed - on etcd instance {{ $labels.instance }}' - summary: a high number of gRPC requests are failing - - alert: HighNumberOfFailedGRPCRequests - expr: sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd"}[5m])) BY (grpc_service, grpc_method) - / sum(rate(grpc_server_handled_total{job="etcd"}[5m])) BY (grpc_service, grpc_method) > 0.05 - for: 5m - labels: - severity: critical - annotations: - description: '{{ $value }}% of requests for {{ $labels.grpc_method }} failed - on etcd instance {{ $labels.instance }}' - summary: a high number of gRPC requests are failing - - alert: GRPCRequestsSlow - expr: histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job="etcd",grpc_type="unary"}[5m])) by (grpc_service, grpc_method, le)) - > 0.15 - for: 10m - labels: - severity: critical - annotations: - description: on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method - }} are slow - summary: slow gRPC requests - - alert: HighNumberOfFailedHTTPRequests - expr: sum(rate(etcd_http_failed_total{job="etcd"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job="etcd"}[5m])) - BY (method) > 0.01 - for: 10m - labels: - severity: warning - annotations: - description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd - instance {{ $labels.instance }}' - summary: a high number of HTTP requests are failing - - alert: HighNumberOfFailedHTTPRequests - expr: sum(rate(etcd_http_failed_total{job="etcd"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job="etcd"}[5m])) - BY (method) > 0.05 - for: 5m - labels: - severity: critical - annotations: - description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd - instance {{ $labels.instance }}' - summary: a high number of HTTP requests are failing - - alert: HTTPRequestsSlow - expr: histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) - > 0.15 - for: 10m - labels: - severity: warning - annotations: - description: on etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method - }} are slow - summary: slow HTTP requests - - alert: EtcdMemberCommunicationSlow - expr: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) - > 0.15 - for: 10m - labels: - severity: warning - annotations: - description: etcd instance {{ $labels.instance }} member communication with - {{ $labels.To }} is slow - summary: etcd member communication is slow - - alert: HighNumberOfFailedProposals - expr: increase(etcd_server_proposals_failed_total{job="etcd"}[1h]) > 5 - labels: - severity: warning - annotations: - description: etcd instance {{ $labels.instance }} has seen {{ $value }} proposal - failures within the last hour - summary: a high number of proposals within the etcd cluster are failing - - alert: HighFsyncDurations - expr: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) - > 0.5 - for: 10m - labels: - severity: warning - annotations: - description: etcd instance {{ $labels.instance }} fync durations are high - summary: high fsync durations - - alert: HighCommitDurations - expr: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) - > 0.25 - for: 10m - labels: - severity: warning - annotations: - description: etcd instance {{ $labels.instance }} commit durations are high - summary: high commit durations - general.rules.yaml: |+ - groups: - - name: general.rules - rules: - - alert: TargetDown - expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10 - for: 10m - labels: - severity: warning - annotations: - description: '{{ $value }}% of {{ $labels.job }} targets are down.' - summary: Targets are down - - alert: DeadMansSwitch - expr: vector(1) - labels: - severity: none - annotations: - description: This is a DeadMansSwitch meant to ensure that the entire Alerting - pipeline is functional. - summary: Alerting DeadMansSwitch - - record: fd_utilization - expr: process_open_fds / process_max_fds - - alert: FdExhaustionClose - expr: predict_linear(fd_utilization[1h], 3600 * 4) > 1 - for: 10m - labels: - severity: warning - annotations: - description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance - will exhaust in file/socket descriptors within the next 4 hours' - summary: file descriptors soon exhausted - - alert: FdExhaustionClose - expr: predict_linear(fd_utilization[10m], 3600) > 1 - for: 10m - labels: - severity: critical - annotations: - description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance - will exhaust in file/socket descriptors within the next hour' - summary: file descriptors soon exhausted - kube-controller-manager.rules.yaml: |+ - groups: - - name: kube-controller-manager.rules - rules: - - alert: K8SControllerManagerDown - expr: absent(up{job="kube-controller-manager"} == 1) - for: 5m - labels: - severity: critical - annotations: - description: There is no running K8S controller manager. Deployments and replication - controllers are not making progress. - runbook: https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager - summary: Controller manager is down - kube-scheduler.rules.yaml: |+ - groups: - - name: kube-scheduler.rules - rules: - - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile - expr: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.99" - - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile - expr: histogram_quantile(0.9, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.9" - - record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile - expr: histogram_quantile(0.5, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.5" - - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile - expr: histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.99" - - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile - expr: histogram_quantile(0.9, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.9" - - record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile - expr: histogram_quantile(0.5, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.5" - - record: cluster:scheduler_binding_latency_seconds:quantile - expr: histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.99" - - record: cluster:scheduler_binding_latency_seconds:quantile - expr: histogram_quantile(0.9, sum(scheduler_binding_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.9" - - record: cluster:scheduler_binding_latency_seconds:quantile - expr: histogram_quantile(0.5, sum(scheduler_binding_latency_microseconds_bucket) - BY (le, cluster)) / 1e+06 - labels: - quantile: "0.5" - - alert: K8SSchedulerDown - expr: absent(up{job="kube-scheduler"} == 1) - for: 5m - labels: - severity: critical - annotations: - description: There is no running K8S scheduler. New pods are not being assigned - to nodes. - runbook: https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler - summary: Scheduler is down - kube-state-metrics.rules.yaml: |+ - groups: - - name: kube-state-metrics.rules - rules: - - alert: DeploymentGenerationMismatch - expr: kube_deployment_status_observed_generation != kube_deployment_metadata_generation - for: 15m - labels: - severity: warning - annotations: - description: Observed deployment generation does not match expected one for - deployment {{$labels.namespaces}}/{{$labels.deployment}} - summary: Deployment is outdated - - alert: DeploymentReplicasNotUpdated - expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas) - or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas)) - unless (kube_deployment_spec_paused == 1) - for: 15m - labels: - severity: warning - annotations: - description: Replicas are not updated and available for deployment {{$labels.namespaces}}/{{$labels.deployment}} - summary: Deployment replicas are outdated - - alert: DaemonSetRolloutStuck - expr: kube_daemonset_status_number_ready / kube_daemonset_status_desired_number_scheduled - * 100 < 100 - for: 15m - labels: - severity: warning - annotations: - description: Only {{$value}}% of desired pods scheduled and ready for daemon - set {{$labels.namespaces}}/{{$labels.daemonset}} - summary: DaemonSet is missing pods - - alert: K8SDaemonSetsNotScheduled - expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled - > 0 - for: 10m - labels: - severity: warning - annotations: - description: A number of daemonsets are not scheduled. - summary: Daemonsets are not scheduled correctly - - alert: DaemonSetsMissScheduled - expr: kube_daemonset_status_number_misscheduled > 0 - for: 10m - labels: - severity: warning - annotations: - description: A number of daemonsets are running where they are not supposed - to run. - summary: Daemonsets are not scheduled correctly - - alert: PodFrequentlyRestarting - expr: increase(kube_pod_container_status_restarts_total[1h]) > 5 - for: 10m - labels: - severity: warning - annotations: - description: Pod {{$labels.namespaces}}/{{$labels.pod}} is was restarted {{$value}} - times within the last hour - summary: Pod is restarting frequently - kubelet.rules.yaml: |+ - groups: - - name: kubelet.rules - rules: - - alert: K8SNodeNotReady - expr: kube_node_status_condition{condition="Ready",status="true"} == 0 - for: 1h - labels: - severity: warning - annotations: - description: The Kubelet on {{ $labels.node }} has not checked in with the API, - or has set itself to NotReady, for more than an hour - summary: Node status is NotReady - - alert: K8SManyNodesNotReady - expr: count(kube_node_status_condition{condition="Ready",status="true"} == 0) - > 1 and (count(kube_node_status_condition{condition="Ready",status="true"} == - 0) / count(kube_node_status_condition{condition="Ready",status="true"})) > 0.2 - for: 1m - labels: - severity: critical - annotations: - description: '{{ $value }}% of Kubernetes nodes are not ready' - - alert: K8SKubeletDown - expr: count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) * 100 > 3 - for: 1h - labels: - severity: warning - annotations: - description: Prometheus failed to scrape {{ $value }}% of kubelets. - - alert: K8SKubeletDown - expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"})) - * 100 > 1 - for: 1h - labels: - severity: critical - annotations: - description: Prometheus failed to scrape {{ $value }}% of kubelets, or all Kubelets - have disappeared from service discovery. - summary: Many Kubelets cannot be scraped - - alert: K8SKubeletTooManyPods - expr: kubelet_running_pod_count > 100 - for: 10m - labels: - severity: warning - annotations: - description: Kubelet {{$labels.instance}} is running {{$value}} pods, close - to the limit of 110 - summary: Kubelet is close to pod limit - kubernetes.rules.yaml: |+ - groups: - - name: kubernetes.rules - rules: - - record: pod_name:container_memory_usage_bytes:sum - expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY - (pod_name) - - record: pod_name:container_spec_cpu_shares:sum - expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) BY (pod_name) - - record: pod_name:container_cpu_usage:sum - expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m])) - BY (pod_name) - - record: pod_name:container_fs_usage_bytes:sum - expr: sum(container_fs_usage_bytes{container_name!="POD",pod_name!=""}) BY (pod_name) - - record: namespace:container_memory_usage_bytes:sum - expr: sum(container_memory_usage_bytes{container_name!=""}) BY (namespace) - - record: namespace:container_spec_cpu_shares:sum - expr: sum(container_spec_cpu_shares{container_name!=""}) BY (namespace) - - record: namespace:container_cpu_usage:sum - expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD"}[5m])) - BY (namespace) - - record: cluster:memory_usage:ratio - expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY - (cluster) / sum(machine_memory_bytes) BY (cluster) - - record: cluster:container_spec_cpu_shares:ratio - expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) / 1000 - / sum(machine_cpu_cores) - - record: cluster:container_cpu_usage:ratio - expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m])) - / sum(machine_cpu_cores) - - record: apiserver_latency_seconds:quantile - expr: histogram_quantile(0.99, rate(apiserver_request_latencies_bucket[5m])) / - 1e+06 - labels: - quantile: "0.99" - - record: apiserver_latency:quantile_seconds - expr: histogram_quantile(0.9, rate(apiserver_request_latencies_bucket[5m])) / - 1e+06 - labels: - quantile: "0.9" - - record: apiserver_latency_seconds:quantile - expr: histogram_quantile(0.5, rate(apiserver_request_latencies_bucket[5m])) / - 1e+06 - labels: - quantile: "0.5" - - alert: APIServerLatencyHigh - expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} - > 1 - for: 10m - labels: - severity: warning - annotations: - description: the API server has a 99th percentile latency of {{ $value }} seconds - for {{$labels.verb}} {{$labels.resource}} - - alert: APIServerLatencyHigh - expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} - > 4 - for: 10m - labels: - severity: critical - annotations: - description: the API server has a 99th percentile latency of {{ $value }} seconds - for {{$labels.verb}} {{$labels.resource}} - - alert: APIServerErrorsHigh - expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m]) - * 100 > 2 - for: 10m - labels: - severity: warning - annotations: - description: API server returns errors for {{ $value }}% of requests - - alert: APIServerErrorsHigh - expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m]) - * 100 > 5 - for: 10m - labels: - severity: critical - annotations: - description: API server returns errors for {{ $value }}% of requests - - alert: K8SApiserverDown - expr: absent(up{job="apiserver"} == 1) - for: 20m - labels: - severity: critical - annotations: - description: No API servers are reachable or all have disappeared from service - discovery - - - alert: K8sCertificateExpirationNotice - labels: - severity: warning - annotations: - description: Kubernetes API Certificate is expiring soon (less than 7 days) - expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0 - - - alert: K8sCertificateExpirationNotice - labels: - severity: critical - annotations: - description: Kubernetes API Certificate is expiring in less than 1 day - expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0 - node.rules.yaml: |+ - groups: - - name: node.rules - rules: - - record: instance:node_cpu:rate:sum - expr: sum(rate(node_cpu{mode!="idle",mode!="iowait",mode!~"^(?:guest.*)$"}[3m])) - BY (instance) - - record: instance:node_filesystem_usage:sum - expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"})) - BY (instance) - - record: instance:node_network_receive_bytes:rate:sum - expr: sum(rate(node_network_receive_bytes[3m])) BY (instance) - - record: instance:node_network_transmit_bytes:rate:sum - expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance) - - record: instance:node_cpu:ratio - expr: sum(rate(node_cpu{mode!="idle"}[5m])) WITHOUT (cpu, mode) / ON(instance) - GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance) - - record: cluster:node_cpu:sum_rate5m - expr: sum(rate(node_cpu{mode!="idle"}[5m])) - - record: cluster:node_cpu:ratio - expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu)) - - alert: NodeExporterDown - expr: absent(up{job="node-exporter"} == 1) - for: 10m - labels: - severity: warning - annotations: - description: Prometheus could not scrape a node-exporter for more than 10m, - or node-exporters have disappeared from discovery - - alert: NodeDiskRunningFull - expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0 - for: 30m - labels: - severity: warning - annotations: - description: device {{$labels.device}} on node {{$labels.instance}} is running - full within the next 24 hours (mounted at {{$labels.mountpoint}}) - - alert: NodeDiskRunningFull - expr: predict_linear(node_filesystem_free[30m], 3600 * 2) < 0 - for: 10m - labels: - severity: critical - annotations: - description: device {{$labels.device}} on node {{$labels.instance}} is running - full within the next 2 hours (mounted at {{$labels.mountpoint}}) - prometheus.rules.yaml: |+ - groups: - - name: prometheus.rules - rules: - - alert: PrometheusConfigReloadFailed - expr: prometheus_config_last_reload_successful == 0 - for: 10m - labels: - severity: warning - annotations: - description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}} - - alert: PrometheusNotificationQueueRunningFull - expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity - for: 10m - labels: - severity: warning - annotations: - description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ - $labels.pod}} - - alert: PrometheusErrorSendingAlerts - expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m]) - > 0.01 - for: 10m - labels: - severity: warning - annotations: - description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ - $labels.pod}} to Alertmanager {{$labels.Alertmanager}} - - alert: PrometheusErrorSendingAlerts - expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m]) - > 0.03 - for: 10m - labels: - severity: critical - annotations: - description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ - $labels.pod}} to Alertmanager {{$labels.Alertmanager}} - - alert: PrometheusNotConnectedToAlertmanagers - expr: prometheus_notifications_alertmanagers_discovered < 1 - for: 10m - labels: - severity: warning - annotations: - description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected - to any Alertmanagers - - alert: PrometheusTSDBReloadsFailing - expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0 - for: 12h - labels: - severity: warning - annotations: - description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} - reload failures over the last four hours.' - summary: Prometheus has issues reloading data blocks from disk - - alert: PrometheusTSDBCompactionsFailing - expr: increase(prometheus_tsdb_compactions_failed_total[2h]) > 0 - for: 12h - labels: - severity: warning - annotations: - description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} - compaction failures over the last four hours.' - summary: Prometheus has issues compacting sample blocks - - alert: PrometheusTSDBWALCorruptions - expr: tsdb_wal_corruptions_total > 0 - for: 4h - labels: - severity: warning - annotations: - description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead - log (WAL).' - summary: Prometheus write-ahead log is corrupted diff --git a/manifests-old/prometheus/prometheus-k8s-service-account.yaml b/manifests-old/prometheus/prometheus-k8s-service-account.yaml deleted file mode 100644 index 58d5342..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-account.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: prometheus-k8s diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-alertmanager.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-alertmanager.yaml deleted file mode 100644 index 19669e3..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-alertmanager.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: alertmanager - labels: - k8s-app: alertmanager -spec: - selector: - matchLabels: - alertmanager: main - namespaceSelector: - matchNames: - - monitoring - endpoints: - - port: web - interval: 30s diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-apiserver.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-apiserver.yaml deleted file mode 100644 index 40361f0..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-apiserver.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kube-apiserver - labels: - k8s-app: apiserver -spec: - jobLabel: component - selector: - matchLabels: - component: apiserver - provider: kubernetes - namespaceSelector: - matchNames: - - default - endpoints: - - port: https - interval: 30s - scheme: https - tlsConfig: - caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - serverName: kubernetes - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-arm-exporter.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-arm-exporter.yaml deleted file mode 100644 index b7d4cb2..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-arm-exporter.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: arm-exporter - namespace: monitoring - labels: - k8s-app: arm-exporter -spec: - jobLabel: k8s-app - selector: - matchLabels: - k8s-app: arm-exporter - namespaceSelector: - matchNames: - - monitoring - endpoints: - - port: http - interval: 30s diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-controller-manager.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-controller-manager.yaml deleted file mode 100644 index 681c320..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-controller-manager.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kube-controller-manager - labels: - k8s-app: kube-controller-manager -spec: - jobLabel: k8s-app - endpoints: - - port: http-metrics - interval: 30s - selector: - matchLabels: - k8s-app: kube-controller-manager - namespaceSelector: - matchNames: - - kube-system diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-scheduler.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-scheduler.yaml deleted file mode 100644 index 6927f58..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-scheduler.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kube-scheduler - labels: - k8s-app: kube-scheduler -spec: - jobLabel: k8s-app - endpoints: - - port: http-metrics - interval: 30s - selector: - matchLabels: - k8s-app: kube-scheduler - namespaceSelector: - matchNames: - - kube-system diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-state-metrics.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-state-metrics.yaml deleted file mode 100644 index 62174b4..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-kube-state-metrics.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kube-state-metrics - labels: - k8s-app: kube-state-metrics -spec: - jobLabel: k8s-app - selector: - matchLabels: - k8s-app: kube-state-metrics - namespaceSelector: - matchNames: - - monitoring - endpoints: - - port: http-main - #scheme: https - interval: 30s - honorLabels: true - #bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - #tlsConfig: - # insecureSkipVerify: true - - port: http-self - # scheme: https - interval: 30s - # bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - # tlsConfig: - # insecureSkipVerify: true diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-kubelet.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-kubelet.yaml deleted file mode 100644 index 16c9752..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-kubelet.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kubelet - labels: - k8s-app: kubelet -spec: - jobLabel: k8s-app - endpoints: - - port: https-metrics - scheme: https - interval: 30s - tlsConfig: - insecureSkipVerify: true - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - - port: https-metrics - scheme: https - path: /metrics/cadvisor - interval: 30s - honorLabels: true - tlsConfig: - insecureSkipVerify: true - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - selector: - matchLabels: - k8s-app: kubelet - namespaceSelector: - matchNames: - - kube-system diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-node-exporter.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-node-exporter.yaml deleted file mode 100644 index 4c78ea1..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-node-exporter.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: node-exporter - labels: - k8s-app: node-exporter -spec: - jobLabel: k8s-app - selector: - matchLabels: - k8s-app: node-exporter - namespaceSelector: - matchNames: - - monitoring - endpoints: - - port: http - #scheme: http - interval: 30s - #bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - #tlsConfig: - # insecureSkipVerify: true diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-prometheus-operator.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-prometheus-operator.yaml deleted file mode 100644 index 0b8028e..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-prometheus-operator.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus-operator - labels: - k8s-app: prometheus-operator -spec: - endpoints: - - port: http - selector: - matchLabels: - k8s-app: prometheus-operator diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-prometheus.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-prometheus.yaml deleted file mode 100644 index c3d11e5..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-prometheus.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: prometheus - labels: - k8s-app: prometheus -spec: - selector: - matchLabels: - prometheus: k8s - namespaceSelector: - matchNames: - - monitoring - endpoints: - - port: web - interval: 30s diff --git a/manifests-old/prometheus/prometheus-k8s-service-monitor-traefik.yaml b/manifests-old/prometheus/prometheus-k8s-service-monitor-traefik.yaml deleted file mode 100644 index a415768..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service-monitor-traefik.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: traefik-ingress-lb - labels: - k8s-app: traefik-ingress-lb -spec: - jobLabel: k8s-app - endpoints: - - port: admin - interval: 30s - selector: - matchLabels: - k8s-app: traefik-ingress-lb - namespaceSelector: - matchNames: - - kube-system diff --git a/manifests-old/prometheus/prometheus-k8s-service.yaml b/manifests-old/prometheus/prometheus-k8s-service.yaml deleted file mode 100644 index 5cd3b65..0000000 --- a/manifests-old/prometheus/prometheus-k8s-service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - prometheus: k8s - name: prometheus-k8s -spec: - type: NodePort - ports: - - name: web - nodePort: 30900 - port: 9090 - protocol: TCP - targetPort: web - selector: - prometheus: k8s diff --git a/manifests-old/prometheus/prometheus-k8s.yaml b/manifests-old/prometheus/prometheus-k8s.yaml deleted file mode 100644 index 90a05b3..0000000 --- a/manifests-old/prometheus/prometheus-k8s.yaml +++ /dev/null @@ -1,39 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: Prometheus -metadata: - name: k8s - labels: - prometheus: k8s -spec: - replicas: 1 - baseImage: carlosedp/prometheus - version: v2.2.1 - retention: "168h" - serviceAccountName: prometheus-k8s - serviceMonitorSelector: - matchExpressions: - - {key: k8s-app, operator: Exists} - ruleSelector: - matchLabels: - role: prometheus-rulefiles - prometheus: k8s - resources: - requests: - # 2Gi is default, but won't schedule if you don't have a node with >2Gi - # memory. Modify based on your target and time-series count for - # production use. This value is mainly meant for demonstration/testing - # purposes. - memory: 512Mi - alerting: - alertmanagers: - - namespace: monitoring - name: alertmanager-main - port: web - storage: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 50Gi diff --git a/manifests-old/smtp-server/create_secret.sh b/manifests-old/smtp-server/create_secret.sh deleted file mode 100755 index f29ae38..0000000 --- a/manifests-old/smtp-server/create_secret.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -echo "Please enter your Gmail account"; -read username; - -echo "Please enter your Gmail password"; -read -s password; - -echo "Creating secret" -kubectl create secret generic smtp-account -n monitoring --from-literal=username=${username} --from-literal=password=${password} diff --git a/manifests-old/smtp-server/smtp.yaml b/manifests-old/smtp-server/smtp.yaml deleted file mode 100644 index b2f6f16..0000000 --- a/manifests-old/smtp-server/smtp.yaml +++ /dev/null @@ -1,68 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: Deployment -metadata: - annotations: - deployment.kubernetes.io/revision: "1" - generation: 1 - labels: - run: smtp-server - name: smtp-server - namespace: monitoring -spec: - replicas: 1 - selector: - matchLabels: - run: smtp-server - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - template: - metadata: - creationTimestamp: null - labels: - run: smtp-server - spec: - containers: - - image: carlosedp/docker-smtp - imagePullPolicy: Always - name: smtp-server - ports: - - containerPort: 25 - protocol: TCP - env: - - name: GMAIL_USER - valueFrom: - secretKeyRef: - name: smtp-account - key: username - - name: GMAIL_PASSWORD - valueFrom: - secretKeyRef: - name: smtp-account - key: password - - name: DISABLE_IPV6 - value: "True" - - name: RELAY_DOMAINS - value: ":192.168.0.0/24:10.0.0.0/16" - resources: {} - restartPolicy: Always - replicas: 1 ---- -apiVersion: v1 -kind: Service -metadata: - labels: - run: smtp-server - name: smtp-server - namespace: monitoring -spec: - ports: - - nodePort: 30025 - port: 25 - protocol: TCP - targetPort: 25 - selector: - run: smtp-server - type: NodePort diff --git a/manifests-old/snmp-exporter/snmp-exporter-deployment.yaml b/manifests-old/snmp-exporter/snmp-exporter-deployment.yaml deleted file mode 100644 index 3e117bb..0000000 --- a/manifests-old/snmp-exporter/snmp-exporter-deployment.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: apps/v1beta1 -kind: Deployment -metadata: - name: snmp-exporter -spec: - replicas: 1 - selector: - matchLabels: - app: snmp-exporter - template: - metadata: - labels: - app: snmp-exporter - spec: - containers: - - image: carlosedp/snmp_exporter:v0.9.0 - name: snmp-exporter - ports: - - containerPort: 9116 - name: metrics diff --git a/manifests-old/snmp-exporter/snmp-exporter-service.yaml b/manifests-old/snmp-exporter/snmp-exporter-service.yaml deleted file mode 100644 index 91ebec1..0000000 --- a/manifests-old/snmp-exporter/snmp-exporter-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - labels: - app: snmp-exporter - name: snmp-exporter -spec: - ports: - - name: http-metrics - port: 9116 - protocol: TCP - targetPort: metrics - selector: - app: snmp-exporter - diff --git a/manifests-old/snmp-exporter/snmp-exporter-servicemonitor-router.yaml b/manifests-old/snmp-exporter/snmp-exporter-servicemonitor-router.yaml deleted file mode 100644 index a90f5d7..0000000 --- a/manifests-old/snmp-exporter/snmp-exporter-servicemonitor-router.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - k8s-app: snmp-exporter - name: snmp-exporter -spec: - jobLabel: k8s-app - selector: - app: snmp-exporter - namespaceSelector: - matchNames: - - monitoring - - endpoints: - - interval: 60s - port: http-metrics - params: - module: - - ddwrt - target: - - 192.168.1.1 - path: "/snmp" - targetPort: 9116