From f2bbdfec9856cb3d6f8c19dc042274af46be2ac5 Mon Sep 17 00:00:00 2001 From: CarlosEDP Date: Wed, 27 Feb 2019 09:29:08 -0300 Subject: [PATCH] Regenerate manifests --- manifests/elasticexporter-deployment.yaml | 73 + manifests/elasticexporter-service.yaml | 14 + ...elasticexporter-serviceMonitorElastic.yaml | 19 + ...elasticexporter-serviceMonitorFluentd.yaml | 19 + manifests/grafana-dashboardDefinitions.yaml | 3455 ++++++++++++++++- manifests/grafana-deployment.yaml | 12 + manifests/node-exporter-daemonset.yaml | 4 +- manifests/prometheus-adapter-apiService.yaml | 2 +- ...prometheus-adapter-clusterRoleBinding.yaml | 1 - manifests/prometheus-rules.yaml | 26 + manifests/ups-exporter-endpoints.yaml | 14 + manifests/ups-exporter-service.yaml | 14 + manifests/ups-exporter-serviceMonitor.yaml | 16 + 13 files changed, 3651 insertions(+), 18 deletions(-) create mode 100644 manifests/elasticexporter-deployment.yaml create mode 100644 manifests/elasticexporter-service.yaml create mode 100644 manifests/elasticexporter-serviceMonitorElastic.yaml create mode 100644 manifests/elasticexporter-serviceMonitorFluentd.yaml create mode 100644 manifests/ups-exporter-endpoints.yaml create mode 100644 manifests/ups-exporter-service.yaml create mode 100644 manifests/ups-exporter-serviceMonitor.yaml diff --git a/manifests/elasticexporter-deployment.yaml b/manifests/elasticexporter-deployment.yaml new file mode 100644 index 0000000..54c60e1 --- /dev/null +++ b/manifests/elasticexporter-deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1beta2 +kind: Deployment +metadata: + labels: + k8s-app: elasticsearch-exporter + name: elasticsearch-exporter + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + k8s-app: elasticsearch-exporter + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + type: RollingUpdate + template: + metadata: + labels: + k8s-app: elasticsearch-exporter + spec: + containers: + - command: + - /bin/elasticsearch_exporter + - -es.uri=http://elasticsearch.logging.svc:9200 + - -es.timeout=60s + - -es.all=true + image: carlosedp/elasticsearch-exporter:1.0.4rc1 + livenessProbe: + httpGet: + path: /health + port: 9108 + initialDelaySeconds: 30 + timeoutSeconds: 10 + name: elasticsearch-exporter + ports: + - containerPort: 9108 + name: es-metrics + readinessProbe: + httpGet: + path: /health + port: 9108 + initialDelaySeconds: 30 + timeoutSeconds: 10 + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 25m + memory: 64Mi + securityContext: + capabilities: + drop: + - SETPCAP + - MKNOD + - AUDIT_WRITE + - CHOWN + - NET_RAW + - DAC_OVERRIDE + - FOWNER + - FSETID + - KILL + - SETGID + - SETUID + - NET_BIND_SERVICE + - SYS_CHROOT + - SETFCAP + readOnlyRootFilesystem: "true" + runAsNonRoot: true + runAsUser: 1000 + restartPolicy: Always diff --git a/manifests/elasticexporter-service.yaml b/manifests/elasticexporter-service.yaml new file mode 100644 index 0000000..6049460 --- /dev/null +++ b/manifests/elasticexporter-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: elasticsearch-exporter + name: elasticsearch-exporter + namespace: monitoring +spec: + ports: + - name: es-metrics + port: 9108 + targetPort: es-metrics + selector: + k8s-app: elasticsearch-exporter diff --git a/manifests/elasticexporter-serviceMonitorElastic.yaml b/manifests/elasticexporter-serviceMonitorElastic.yaml new file mode 100644 index 0000000..ed5bf1d --- /dev/null +++ b/manifests/elasticexporter-serviceMonitorElastic.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: elasticsearch-exporter + name: elasticsearch-exporter + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: es-metrics + scheme: http + jobLabel: k8s-app + namespaceSelector: + matchNames: + - monitoring + selector: + matchLabels: + k8s-app: elasticsearch-exporter diff --git a/manifests/elasticexporter-serviceMonitorFluentd.yaml b/manifests/elasticexporter-serviceMonitorFluentd.yaml new file mode 100644 index 0000000..858274a --- /dev/null +++ b/manifests/elasticexporter-serviceMonitorFluentd.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: fluentd-es + name: fluentd-es + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + jobLabel: k8s-app + namespaceSelector: + matchNames: + - logging + selector: + matchLabels: + k8s-app: fluentd-es diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 610ffd5..291d70e 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -1,5 +1,803 @@ apiVersion: v1 items: +- apiVersion: v1 + data: + apc-ups-dashboard.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dashboard for single APC UPS", + "editable": true, + "gnetId": 4191, + "graphTooltip": 0, + "id": 14, + "links": [ + + ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": true, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 6, + "interval": null, + "isNew": true, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apcups_battery_charge_percent", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": "25,50", + "title": "Battery Charge", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 6, + "y": 0 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "apcups_battery_charge_percent", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Battery", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Charge Percent", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "decimals": null, + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "prometheus", + "format": "m", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apcups_time_left_seconds / 60", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "2,5", + "title": "Time on Battery", + "type": "singlestat", + "valueFontSize": "150%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 7 + }, + "id": 8, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apcups_status == 1", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ status }}", + "refId": "A" + } + ], + "thresholds": "", + "title": "UPS Status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "name" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 18, + "x": 6, + "y": 7 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "apcups_line_nominal_volts", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Nominal", + "refId": "A" + }, + { + "expr": "apcups_line_volts", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Actual", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Line Voltage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "volt", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 18, + "x": 0, + "y": 14 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "apcups_load_percent", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "Percent", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 14 + }, + "id": 10, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " W", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apcups_nominal_power_watts*(apcups_load_percent/100)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Current UPS Load", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 17 + }, + "id": 11, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " W", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "apcups_nominal_power_watts*(apcups_load_percent/100)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Average UPS Load", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "refresh": "5m", + "schemaVersion": 16, + "style": "dark", + "tags": [ + + ], + "templating": { + "list": [ + + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "UPS", + "uid": "CZrVv8rmk", + "version": 4 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-apc-ups-dashboard + namespace: monitoring - apiVersion: v1 data: coredns-dashboard.json: |- @@ -1479,6 +2277,2455 @@ items: metadata: name: grafana-dashboard-coredns-dashboard namespace: monitoring +- apiVersion: v1 + data: + elasticsearch-dashboard.json: |- + { + "__inputs": [ + + ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.4.0" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "iteration": 1549021227642, + "links": [ + + ], + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 90, + "title": "Cluster", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorPostfix": false, + "colorPrefix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 92, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "Value", + "targets": [ + { + "expr": "scalar(elasticsearch_cluster_health_status{color=\"green\",cluster=\u007e\"$cluster\"}) + scalar(elasticsearch_cluster_health_status{color=\"yellow\",cluster=\u007e\"$cluster\"}) * 2 + scalar(elasticsearch_cluster_health_status{color=\"red\",cluster=\u007e\"$cluster\"}) * 3", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "2,3", + "title": "Cluster Status", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "green", + "value": "1" + }, + { + "op": "=", + "text": "yellow", + "value": "2" + }, + { + "op": "=", + "text": "red", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$server", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 8, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(elasticsearch_cluster_health_number_of_nodes{cluster=\u007e\"$cluster\"})/count(elasticsearch_cluster_health_number_of_nodes{cluster=\u007e\"$cluster\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "elasticsearch_cluster_health_number_of_nodes", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "timeFrom": null, + "timeShift": null, + "title": "Running Nodes", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 94, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_number_of_data_nodes{cluster=\u007e\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Active Data Nodes", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 96, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_number_of_pending_tasks{cluster=\u007e\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0.5,1", + "title": "Pending Tasks", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 76, + "panels": [ + + ], + "title": "Shards", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 78, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_active_shards{cluster=\u007e\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Active Shards", + "transparent": false, + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 5 + }, + "id": 80, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_active_primary_shards{cluster=\u007e\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Active Primary Shards", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 5 + }, + "id": 82, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_initializing_shards{cluster=\u007e\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0.5,1", + "title": "Initializing Shards", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 5 + }, + "id": 84, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_relocating_shards{cluster=\u007e\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0.5,1", + "title": "Relocating Shards", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 5 + }, + "id": 86, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_unassigned_shards{cluster=\u007e\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0.5,1", + "title": "Unassigned Shards", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 5 + }, + "id": 88, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "elasticsearch_cluster_health_delayed_unassigned_shards{cluster=\u007e\"$cluster\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0.5,1", + "title": "Delayed Unassigned Shards", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 70, + "panels": [ + + ], + "title": "Documents", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "editable": true, + "error": false, + "fill": 1, + "grid": { + + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(elasticsearch_indices_docs{cluster=\u007e\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Documents", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Documents indexed", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "editable": true, + "error": false, + "fill": 1, + "grid": { + + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(elasticsearch_indices_store_size_bytes{cluster=\u007e\"$cluster\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Index Size", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Index Size", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 72, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_indexing_index_total{cluster=\u007e\"$cluster\"}[1h])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Documents Indexed Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": "Documents/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 74, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_indices_search_fetch_total{cluster=\u007e\"$cluster\"}[1h])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Query Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": "Queris/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "editable": true, + "error": false, + "fill": 1, + "grid": { + + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 27 + }, + "height": "", + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(elasticsearch_thread_pool_queue_count{cluster=\u007e\"$cluster\", type!=\"management\"}) by (type)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Type: {{type}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Queue Count", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 68, + "panels": [ + + ], + "title": "System", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$server", + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 0, + "y": 36 + }, + "height": "", + "id": 12, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(node_memory_MemTotal{cluster=\u007e\"$cluster\"})", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Total Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$server", + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 8, + "y": 36 + }, + "height": "", + "id": 13, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(node_memory_MemFree{cluster=\u007e\"$cluster\"})", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Total Memory Free", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$server", + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 16, + "y": 36 + }, + "height": "", + "id": 14, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(node_memory_MemAvailable{cluster=\u007e\"$cluster\"})", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Total Memory Available", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "editable": true, + "error": false, + "fill": 1, + "grid": { + + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(elasticsearch_thread_pool_active_count{cluster=\u007e\"$cluster\", type!=\"management\"}) by (type)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Type: {{ type }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Thread Pools", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "editable": true, + "error": false, + "fill": 1, + "grid": { + + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(elasticsearch_thread_pool_rejected_count{cluster=\u007e\"$cluster\", type!=\"management\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} {{ type }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Thread pool rejections", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 57, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "bucketAggs": [ + { + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "dsType": "elasticsearch", + "expr": "avg(irate(node_cpu{cluster=\u007e\"$cluster\"}[10s])) by(mode) *100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ mode }}", + "metric": "elasticsearch_breakers_tripped", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "refId": "A", + "step": 240, + "timeField": "failure_tstamp" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Avg. CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "editable": true, + "error": false, + "fill": 1, + "grid": { + + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg_over_time(elasticsearch_jvm_memory_used_bytes{area=\"heap\",cluster=\u007e\"$cluster\"}[15m]) / elasticsearch_jvm_memory_max_bytes{area=\"heap\",cluster=\u007e\"$cluster\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Avg Heap in 15min", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "editable": true, + "error": false, + "fill": 1, + "grid": { + + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(elasticsearch_transport_rx_packets_total{cluster=\u007e\"$cluster\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "RX", + "refId": "A", + "step": 240 + }, + { + "expr": "sum(rate(elasticsearch_transport_tx_packets_total{cluster=\u007e\"$cluster\"}[5m])) * -1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "TX", + "refId": "B", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "RX/TX Rate 5m", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$server", + "editable": true, + "error": false, + "fill": 1, + "grid": { + + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=\u007e\"$cluster\"}[1m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ name }} {{ gc }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "GC seconds", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "infra", + "b2c", + "elastic" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "label": "Server", + "name": "server", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": { + + }, + "datasource": "$server", + "definition": "", + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "cluster", + "options": [ + + ], + "query": "label_values(elasticsearch_cluster_health_status,cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "ElasticSearch Example", + "uid": "n_nxrE_mk", + "version": 1 + } + kind: ConfigMap + metadata: + name: grafana-dashboard-elasticsearch-dashboard + namespace: monitoring - apiVersion: v1 data: k8s-cluster-rsrc-use.json: |- @@ -4310,7 +7557,7 @@ items: }, "yaxes": [ { - "format": "decbytes", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -4398,7 +7645,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Requests", @@ -4416,7 +7663,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Requests %", @@ -4452,7 +7699,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Limits %", @@ -5104,7 +8351,7 @@ items: ], "timeFrom": null, "timeShift": null, - "title": "Memory Usage", + "title": "Memory Usage (w/o cache)", "tooltip": { "shared": true, "sort": 0, @@ -5122,7 +8369,7 @@ items: }, "yaxes": [ { - "format": "decbytes", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -5210,7 +8457,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Requests", @@ -5228,7 +8475,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Requests %", @@ -5264,7 +8511,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Limits %", @@ -5284,6 +8531,60 @@ items: "type": "number", "unit": "percentunit" }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, { "alias": "Pod", "colorMode": null, @@ -5363,6 +8664,33 @@ items: "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_rss{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_cache{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_swap{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ @@ -5930,10 +9258,26 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "expr": "sum(container_memory_rss{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{container_name}}", + "legendFormat": "{{container_name}} (RSS)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_cache{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_name}} (Cache)", + "legendLink": null, + "step": 10 + }, + { + "expr": "sum(container_memory_swap{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{container_name}} (Swap)", "legendLink": null, "step": 10 } @@ -5961,7 +9305,7 @@ items: }, "yaxes": [ { - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -6049,7 +9393,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Requests", @@ -6067,7 +9411,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Requests %", @@ -6103,7 +9447,7 @@ items: ], "type": "number", - "unit": "decbytes" + "unit": "bytes" }, { "alias": "Memory Limits %", @@ -6123,6 +9467,60 @@ items: "type": "number", "unit": "percentunit" }, + { + "alias": "Memory Usage (RSS)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #F", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Cache)", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #G", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Memory Usage (Swap", + "colorMode": null, + "colors": [ + + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #H", + "thresholds": [ + + ], + "type": "number", + "unit": "bytes" + }, { "alias": "Container", "colorMode": null, @@ -6202,6 +9600,33 @@ items: "legendFormat": "", "refId": "E", "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_rss{namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_cache{namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "expr": "sum(label_replace(container_memory_swap{namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 } ], "thresholds": [ diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index e4e6e77..191acba 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -42,9 +42,15 @@ spec: - mountPath: /etc/grafana/provisioning/dashboards name: grafana-dashboards readOnly: false + - mountPath: /grafana-dashboard-definitions/0/apc-ups-dashboard + name: grafana-dashboard-apc-ups-dashboard + readOnly: false - mountPath: /grafana-dashboard-definitions/0/coredns-dashboard name: grafana-dashboard-coredns-dashboard readOnly: false + - mountPath: /grafana-dashboard-definitions/0/elasticsearch-dashboard + name: grafana-dashboard-elasticsearch-dashboard + readOnly: false - mountPath: /grafana-dashboard-definitions/0/k8s-cluster-rsrc-use name: grafana-dashboard-k8s-cluster-rsrc-use readOnly: false @@ -100,9 +106,15 @@ spec: - configMap: name: grafana-dashboards name: grafana-dashboards + - configMap: + name: grafana-dashboard-apc-ups-dashboard + name: grafana-dashboard-apc-ups-dashboard - configMap: name: grafana-dashboard-coredns-dashboard name: grafana-dashboard-coredns-dashboard + - configMap: + name: grafana-dashboard-elasticsearch-dashboard + name: grafana-dashboard-elasticsearch-dashboard - configMap: name: grafana-dashboard-k8s-cluster-rsrc-use name: grafana-dashboard-k8s-cluster-rsrc-use diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml index 213714d..3d2a434 100644 --- a/manifests/node-exporter-daemonset.yaml +++ b/manifests/node-exporter-daemonset.yaml @@ -74,8 +74,10 @@ spec: runAsUser: 65534 serviceAccountName: node-exporter tolerations: + - effect: NoExecute + operator: Exists - effect: NoSchedule - key: node-role.kubernetes.io/master + operator: Exists volumes: - hostPath: path: /proc diff --git a/manifests/prometheus-adapter-apiService.yaml b/manifests/prometheus-adapter-apiService.yaml index 95d5c32..a215efe 100644 --- a/manifests/prometheus-adapter-apiService.yaml +++ b/manifests/prometheus-adapter-apiService.yaml @@ -1,4 +1,4 @@ -apiVersion: apiregistration.k8s.io/v1beta1 +apiVersion: apiregistration.k8s.io/v1 kind: APIService metadata: name: v1beta1.metrics.k8s.io diff --git a/manifests/prometheus-adapter-clusterRoleBinding.yaml b/manifests/prometheus-adapter-clusterRoleBinding.yaml index 29fa917..7e8f3da 100644 --- a/manifests/prometheus-adapter-clusterRoleBinding.yaml +++ b/manifests/prometheus-adapter-clusterRoleBinding.yaml @@ -2,7 +2,6 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: prometheus-adapter - namespace: monitoring roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 19432b5..46c849a 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -319,6 +319,32 @@ spec: - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) record: cluster:node_cpu:ratio + - name: elasticsearch-k8s-rules + rules: + - expr: 100 * (elasticsearch_filesystem_data_size_bytes - elasticsearch_filesystem_data_free_bytes) + / elasticsearch_filesystem_data_size_bytes + record: elasticsearch_filesystem_data_used_percent + - expr: 100 - elasticsearch_filesystem_data_used_percent + record: elasticsearch_filesystem_data_free_percent + - alert: ElasticsearchTooFewNodesRunning + annotations: + message: There are only {{$value}} < 3 ElasticSearch nodes running + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-elasticsearchtoofewnodesrunning + summary: ElasticSearch running on less than 3 nodes + expr: elasticsearch_cluster_health_number_of_nodes < 3 + for: 5m + labels: + severity: critical + - alert: ElasticsearchHeapTooHigh + annotations: + message: The heap usage is over 90% for 15m + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-elasticsearchheaptoohigh + summary: ElasticSearch node {{$labels.node}} heap usage is high + expr: elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"} + > 0.9 + for: 15m + labels: + severity: critical - name: kubernetes-absent rules: - alert: AlertmanagerDown diff --git a/manifests/ups-exporter-endpoints.yaml b/manifests/ups-exporter-endpoints.yaml new file mode 100644 index 0000000..ad5b16c --- /dev/null +++ b/manifests/ups-exporter-endpoints.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Endpoints +metadata: + labels: + k8s-app: ups-exporter + name: ups-exporter + namespace: monitoring +subsets: +- addresses: + - ip: 192.168.1.62 + ports: + - name: metrics + port: 9099 + protocol: TCP diff --git a/manifests/ups-exporter-service.yaml b/manifests/ups-exporter-service.yaml new file mode 100644 index 0000000..d62a6b5 --- /dev/null +++ b/manifests/ups-exporter-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + k8s-app: ups-exporter + name: ups-exporter + namespace: monitoring +spec: + clusterIP: None + ports: + - name: metrics + port: 9099 + targetPort: 9099 + selector: null diff --git a/manifests/ups-exporter-serviceMonitor.yaml b/manifests/ups-exporter-serviceMonitor.yaml new file mode 100644 index 0000000..42c2a1b --- /dev/null +++ b/manifests/ups-exporter-serviceMonitor.yaml @@ -0,0 +1,16 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: ups-exporter + name: ups-exporter + namespace: monitoring +spec: + endpoints: + - interval: 30s + port: metrics + scheme: http + jobLabel: k8s-app + selector: + matchLabels: + k8s-app: ups-exporter