From a5c42e5c08b2604015f027c9a6d811c09b50c05b Mon Sep 17 00:00:00 2001 From: CarlosEDP Date: Tue, 6 Mar 2018 10:42:07 -0500 Subject: [PATCH] Added custom dashboards --- ...toring (via Prometheus)-1520350356452.json | 1630 ++++++ ...Exporter Server Metrics-1520350339243.json | 1609 ++++++ .../Prometheus2.0-1520350511205.json | 5114 +++++++++++++++++ ...aefik Realtime Metrics-1520350498858.json | 1146 ++++ 4 files changed, 9499 insertions(+) create mode 100644 grafana-dashboards/Kubernetes cluster monitoring (via Prometheus)-1520350356452.json create mode 100644 grafana-dashboards/Node Exporter Server Metrics-1520350339243.json create mode 100644 grafana-dashboards/Prometheus2.0-1520350511205.json create mode 100644 grafana-dashboards/Traefik Realtime Metrics-1520350498858.json diff --git a/grafana-dashboards/Kubernetes cluster monitoring (via Prometheus)-1520350356452.json b/grafana-dashboards/Kubernetes cluster monitoring (via Prometheus)-1520350356452.json new file mode 100644 index 0000000..f7d204d --- /dev/null +++ b/grafana-dashboards/Kubernetes cluster monitoring (via Prometheus)-1520350356452.json @@ -0,0 +1,1630 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "alertlist", + "name": "Alert List", + "version": "5.0.0" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", + "editable": true, + "gnetId": 162, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 4, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_memory_MemTotal) - sum(node_memory_MemFree+node_memory_Buffers+node_memory_Cached) ) / sum(node_memory_MemTotal) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Cluster memory usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(0, 0, 0, 0)", + "rgb(210, 1, 1)", + "#0a50a1" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(rpi_up == 1) / sum(rpi_up)) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "101", + "title": "Up Nodes", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 0, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 6, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(100 - (avg by (instance) (irate(node_cpu{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Cluster CPU usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "fontSize": "110%", + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 25, + "links": [], + "pageSize": null, + "scroll": false, + "showHeader": true, + "sort": { + "col": 2, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "alias": "Uptime", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/endpoint|job|namespace|pod|service/", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "preserveFormat": false, + "sanitize": false, + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "time() - node_boot_time", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Node Uptime", + "transform": "table", + "transparent": true, + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 15, + "panels": [], + "title": "Nodes", + "type": "row" + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 3500000000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Memory Usage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal - (node_memory_MemFree+node_memory_Buffers+node_memory_Cached )", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3500000000 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 90 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "CPU Usage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (instance) (irate(node_cpu{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", + "format": "time_series", + "intervalFactor": 3, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 90 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 31, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ALERTS{alertstate=\"firing\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + }, + { + "expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}", + "format": "table", + "hide": true, + "intervalFactor": 1, + "refId": "B" + } + ], + "title": "Active Alerts", + "transform": "table", + "type": "table" + }, + { + "gridPos": { + "h": 9, + "w": 5, + "x": 12, + "y": 17 + }, + "id": 27, + "limit": 10, + "links": [], + "onlyAlertsOnDashboard": false, + "show": "current", + "sortOrder": 1, + "stateFilter": [], + "title": "Alarms", + "type": "alertlist" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 17 + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_filesystem_size{device=~\"/dev/.*\"}) - sum(node_filesystem_free{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size{device=~\"/dev/.*\"}) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Cluster Filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 1 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Node Down", + "noDataState": "alerting", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 4, + "w": 7, + "x": 17, + "y": 22 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rpi_up", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 1 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Up Nodes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "CPU Temperature alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rpi_cpu_temperature_celsius", + "format": "time_series", + "intervalFactor": 5, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Temperature", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "celsius", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 17, + "panels": [], + "title": "Pods", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 0, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 3, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 270, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[1m] ) )", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ container_name}}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pod CPU usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 2, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (container_name, image))", + "format": "time_series", + "hide": true, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (container_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ container_name }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pod memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 150, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])) by (name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Sent Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 10, + "max": 8, + "min": 0, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 21, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 150, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[5m])) by (name)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Received Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 8, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Receive Traffic", + "metric": "network", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Transmit Traffic", + "metric": "network", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pod Network i/o", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "custom" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kubernetes cluster monitoring (via Prometheus)", + "uid": "82pBZCmRk", + "version": 22 +} \ No newline at end of file diff --git a/grafana-dashboards/Node Exporter Server Metrics-1520350339243.json b/grafana-dashboards/Node Exporter Server Metrics-1520350339243.json new file mode 100644 index 0000000..afcbba7 --- /dev/null +++ b/grafana-dashboards/Node Exporter Server Metrics-1520350339243.json @@ -0,0 +1,1609 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dashboard to view multiple servers", + "editable": true, + "gnetId": 405, + "graphTooltip": 0, + "id": null, + "iteration": 1520350315682, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 56, + "panels": [], + "title": "Main", + "type": "row" + }, + { + "content": "", + "editable": true, + "error": false, + "gridPos": { + "h": 1, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 11, + "links": [], + "minSpan": 4, + "mode": "html", + "repeat": "node", + "repeatDirection": "h", + "style": {}, + "title": "$node", + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 0, + "y": 2 + }, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": "node", + "repeatDirection": "h", + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(node_cpu{instance=~\"$node\", mode=\"system\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400, + "target": "" + } + ], + "thresholds": "", + "title": "CPU Cores", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 10, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 5 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": true, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "repeatDirection": "h", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode)(irate(node_cpu{mode=\"system\",instance=~'$node'}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='user',instance=~'$node'}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "user", + "refId": "B", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='nice',instance=~'$node'}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "nice", + "refId": "C", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='iowait',instance=~'$node'}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "iowait", + "refId": "E", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='steal',instance=~'$node'}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "steal", + "refId": "H", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='idle',instance=~'$node'}[5m]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "idle", + "refId": "D", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='irq',instance=~'$node'}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "irq", + "refId": "F", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='softirq',instance=~'$node'}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "softirq", + "refId": "G", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='guest',instance=~'$node'}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "guest", + "refId": "I", + "step": 1200 + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": true, + "fillColor": "rgba(216, 200, 27, 0.27)", + "op": "gt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "%", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Slab": "#E5A8E2", + "Swap": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/Apps|Buffers|Cached|Free|Slab|SwapCached|PageTables|VmallocUsed/", + "fill": 5, + "stack": true + }, + { + "alias": "Swap", + "fill": 5, + "stack": true + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "( node_memory_MemTotal{instance=~'$node'} - node_memory_MemFree{instance=~'$node'} - node_memory_Buffers{instance=~'$node'} - node_memory_Cached{instance=~'$node'} - node_memory_SwapCached{instance=~'$node'} - node_memory_Slab{instance=~'$node'} - node_memory_PageTables{instance=~'$node'} - node_memory_VmallocUsed{instance=~'$node'} )", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Apps", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "node_memory_Buffers{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Buffers", + "refId": "B", + "step": 1200 + }, + { + "expr": "node_memory_Cached{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Cached", + "refId": "D", + "step": 1200 + }, + { + "expr": "node_memory_MemFree{instance=~'$node'}", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Free", + "refId": "E", + "step": 1200 + }, + { + "expr": "node_memory_Slab{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Slab", + "refId": "F", + "step": 1200 + }, + { + "expr": "node_memory_SwapCached{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "SwapCached", + "refId": "G", + "step": 1200 + }, + { + "expr": "node_memory_PageTables{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "PageTables", + "refId": "H", + "step": 1200 + }, + { + "expr": "node_memory_VmallocUsed{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "VmallocUsed", + "metric": "", + "refId": "I", + "step": 1200 + }, + { + "expr": "(node_memory_SwapTotal{instance=~'$node'} - node_memory_SwapFree{instance=~'$node'})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Swap", + "metric": "", + "refId": "C", + "step": 1200 + }, + { + "expr": "node_memory_Committed_AS{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Committed", + "metric": "", + "refId": "J", + "step": 1200 + }, + { + "expr": "node_memory_Mapped{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Mapped", + "refId": "K", + "step": 1200 + }, + { + "expr": "node_memory_Active{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Active", + "metric": "", + "refId": "L", + "step": 1200 + }, + { + "expr": "node_memory_Inactive{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Inactive", + "metric": "", + "refId": "M", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "GB", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 19 + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "repeatDirection": "h", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$node\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "load", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 26 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100.0 - 100 * (node_filesystem_avail{instance=~'$node',device !~'tmpfs',device!~'by-uuid'} / node_filesystem_size{instance=~'$node',device !~'tmpfs',device!~'by-uuid'})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mountpoint}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Used", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 33 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_ms{instance=~\"$node\"}[5m])/10", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilization per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": 100, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 40 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_read$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_reads_completed{instance=~'$node'}[5m])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_read", + "metric": "", + "refId": "A", + "step": 2400, + "target": "" + }, + { + "expr": "irate(node_disk_writes_completed{instance=~'$node'}[5m])", + "intervalFactor": 2, + "legendFormat": "{{device}}_write", + "metric": "", + "refId": "B", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk IOs per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IO/second read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 47 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_read/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_sectors_read{instance=~'$node'}[5m]) * 512", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_read", + "refId": "B", + "step": 2400 + }, + { + "expr": "irate(node_disk_sectors_written{instance=~'$node'}[5m]) * 512", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_write", + "metric": "", + "refId": "A", + "step": 2400, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Throughput per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bytes/second read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 54 + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_context_switches{instance=~\"$node\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "context switches", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Context Switches", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 61 + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_in/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes{instance=~'$node'}[5m])*8", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}_in", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "irate(node_network_transmit_bytes{instance=~'$node'}[5m])*8", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}_out", + "refId": "B", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bits", + "label": "bits in (-) / bits out (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 68 + }, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}", + "intervalFactor": 2, + "legendFormat": "established", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Netstat", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 75 + }, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + }, + { + "alias": "Udp_NoPorts", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Udp_InDatagrams{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_InDatagrams", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "irate(node_netstat_Udp_InErrors{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_InErrors", + "refId": "B", + "step": 1200 + }, + { + "expr": "irate(node_netstat_Udp_OutDatagrams{instance=~\"$node\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Udp_OutDatagrams", + "refId": "C", + "step": 1200 + }, + { + "expr": "irate(node_netstat_Udp_NoPorts{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_NoPorts", + "refId": "D", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "UDP Stats", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 82 + }, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_nf_conntrack_entries_limit{instance=~\"$node\"} - node_nf_conntrack_entries{instance=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "free", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Conntrack", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 89 + }, + "id": 88, + "panels": [], + "title": "Row title", + "type": "row" + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "custom" + ], + "templating": { + "list": [ + { + "allFormat": "glob", + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "", + "multi": true, + "multiFormat": "regex values", + "name": "node", + "options": [], + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "", + "multi": true, + "multiFormat": "regex values", + "name": "node_no_port", + "options": [], + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "regex": "/^(.*)\\:.*/", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Node Exporter Server Metrics", + "uid": "GxJ3JT6zk", + "version": 4 +} \ No newline at end of file diff --git a/grafana-dashboards/Prometheus2.0-1520350511205.json b/grafana-dashboards/Prometheus2.0-1520350511205.json new file mode 100644 index 0000000..de29bb0 --- /dev/null +++ b/grafana-dashboards/Prometheus2.0-1520350511205.json @@ -0,0 +1,5114 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + }, + { + "name": "VAR_SCRAPE_INTERVAL", + "type": "constant", + "label": "Scrape interval seconds", + "value": "60", + "description": "" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$datasource", + "enable": true, + "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)", + "hide": false, + "iconColor": "rgb(250, 44, 18)", + "limit": 100, + "name": "downage", + "showIn": 0, + "step": "30s", + "tagKeys": "instance", + "textFormat": "prometheus down", + "titleFormat": "Downage", + "type": "alert" + }, + { + "datasource": "$datasource", + "enable": true, + "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)", + "hide": false, + "iconColor": "#fceaca", + "limit": 100, + "name": "Reload", + "showIn": 0, + "step": "5m", + "tagKeys": "instance", + "tags": [], + "titleFormat": "Reload", + "type": "tags" + } + ] + }, + "description": "Dashboard for monitoring of Prometheus v2.x.x", + "editable": true, + "gnetId": 3681, + "graphTooltip": 1, + "id": null, + "iteration": 1520350506982, + "links": [ + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Dashboard's Github ", + "tooltip": "Github repo of this dashboard", + "type": "link", + "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard" + }, + { + "icon": "doc", + "tags": [], + "targetBlank": true, + "title": "Prometheus Docs", + "tooltip": "", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 55, + "panels": [], + "repeat": null, + "title": "Header instance info", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#bf1b00" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "format": "s", + "gauge": { + "maxValue": 1000000, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 41, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#bf1b00" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "short", + "gauge": { + "maxValue": 1000000, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 4, + "y": 1 + }, + "id": 42, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "500000,800000,1000000", + "title": "Total count of time series", + "type": "singlestat", + "valueFontSize": "150%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 48, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "version", + "targets": [ + { + "expr": "prometheus_build_info{instance=\"$instance\"}", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Version", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 15, + "y": 1 + }, + "id": 49, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Actual head block length", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "", + "gridPos": { + "h": 5, + "w": 2, + "x": 19, + "y": 1 + }, + "height": "", + "id": 50, + "links": [], + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#e6522c", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 1, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 52, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "10,20", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 56, + "panels": [], + "repeat": null, + "title": "Main info", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 15, + "legend": { + "avg": true, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "max duration for {{slice}}", + "metric": "prometheus_local_storage_rushed_mode", + "refId": "A", + "step": 900 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Query elapsed time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "created on {{ instance }}", + "metric": "prometheus_local_storage_maintain_series_duration_seconds_count", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "removed on {{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Head series created/deleted", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "exceeded_sample_limit on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "duplicate_timestamp on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "B", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "out_of_bounds on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "C", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "out_of_order on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "D", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "rule_evaluation_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "G", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "tsdb_compactions_failed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "K", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "tsdb_reloads_failures on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "L", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "head_series_not_found on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "N", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "evaluator_iterations_missed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "O", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "evaluator_iterations_skipped on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "P", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Prometheus errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 57, + "panels": [], + "repeat": null, + "title": "Scrape & rule duration", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": false, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval", + "format": "time_series", + "interval": "2m", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 300 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Scrape delay (counts with 1m scrape interval)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Queue length", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Queue length", + "metric": "prometheus_local_storage_indexing_queue_length", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Rule evaulation duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 58, + "panels": [], + "repeat": null, + "title": "Requests & queries", + "type": "row" + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 23 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ handler }} on {{ instance }}", + "metric": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 23 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ handler }} on {{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request duration per handler", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 23 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ handler }} in {{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Request size by handler", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Allocated bytes": "#F9BA8F", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max count collector": "#bf1b00", + "Max count harvester": "#bf1b00", + "Max to persist": "#3F6833", + "RSS": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 23 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Max.*/", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current count ", + "metric": "last", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Max count", + "metric": "last", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cont of concurent queries", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 59, + "panels": [], + "repeat": null, + "title": "Alerting", + "type": "row" + }, + { + "aliasColors": { + "Alert queue capacity on o collector": "#bf1b00", + "Alert queue capacity on o harvester": "#bf1b00", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 31 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*capacity.*/", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Alert queue capacity ", + "metric": "prometheus_local_storage_checkpoint_last_size_bytes", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Alert queue size on ", + "metric": "prometheus_local_storage_checkpoint_last_size_bytes", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Alert queue size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 31 + }, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Checkpoint chunks written/s", + "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Count of discovered alertmanagers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 31 + }, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "notifications_dropped on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "F", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "rule_evaluation_failures on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Alerting errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 60, + "panels": [], + "repeat": null, + "title": "Service discovery", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 39 + }, + "id": 43, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Consul target sync count", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Consul SD sync count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 39 + }, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Marathon target sync count", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Marathon SD sync count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 39 + }, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Count of target synces", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Kubernetes SD sync count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 39 + }, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "exceeded_sample_limit on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_file_read_error on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "E", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_consul_rpc_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "H", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_marathon_refresh_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "I", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_openstack_refresh_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "J", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Service discovery errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 61, + "panels": [], + "repeat": null, + "title": "TSDB stats", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 47 + }, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Reloaded block from disk", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 47 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Loaded data blocks", + "metric": "prometheus_local_storage_memory_chunkdescs", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Loaded data blocks", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 47 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Time series count", + "metric": "prometheus_local_storage_memory_series", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Time series total count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 47 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "samples/s {{instance}}", + "metric": "prometheus_local_storage_ingested_samples_total", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Samples Appended per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 62, + "panels": [], + "repeat": null, + "title": "Head block stats", + "type": "row" + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "To persist": "#9AC48A" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 55 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Max.*/", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Head chunk count", + "metric": "prometheus_local_storage_memory_chunks", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Head chunks count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 55 + }, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Length of head block", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 55 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "created on {{ instance }}", + "refId": "B" + }, + { + "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "deleted on {{ instance }}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Head Chunks Created/Deleted per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 62 + }, + "id": 63, + "panels": [], + "repeat": null, + "title": "Data maintenance", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 63 + }, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compaction duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 63 + }, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} on {{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Go Garbage collection duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 63 + }, + "id": 37, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} on {{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "WAL truncate duration seconds", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 63 + }, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} {{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "WAL fsync duration seconds", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 64, + "panels": [], + "repeat": null, + "title": "RAM&CPU", + "type": "row" + }, + { + "aliasColors": { + "Allocated bytes": "#7EB26D", + "Allocated bytes - 1m max": "#BF1B00", + "Allocated bytes - 1m min": "#BF1B00", + "Allocated bytes - 5m max": "#BF1B00", + "Allocated bytes - 5m min": "#BF1B00", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "RSS": "#447EBC" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": null, + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 71 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/-/", + "fill": 0 + }, + { + "alias": "collector heap size", + "color": "#E0752D", + "fill": 0, + "linewidth": 2 + }, + { + "alias": "collector kubernetes memory limit", + "color": "#BF1B00", + "fill": 0, + "linewidth": 3 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Total resident memory - {{instance}}", + "metric": "process_resident_memory_bytes", + "refId": "B", + "step": 1800 + }, + { + "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Total llocated bytes - {{instance}}", + "metric": "go_memstats_alloc_bytes", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Allocated bytes": "#F9BA8F", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "RSS": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 71 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Allocated Bytes/s", + "metric": "go_memstats_alloc_bytes", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Allocations per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 71 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "CPU/s", + "metric": "prometheus_local_storage_ingested_samples_total", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + "avg" + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 78 + }, + "id": 65, + "panels": [], + "repeat": null, + "title": "Heapster description", + "type": "row" + }, + { + "content": "Two rows bellow can serve as example if you are running **Prometheus** in **Kubernetes** and uses **Heapster** with **InfluxDB**.\n\nThe schema and queries are very hard to generalize so you will have to tweak them but I leave them here for inspiration.", + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 79 + }, + "id": 51, + "links": [], + "mode": "markdown", + "title": "Heapster rows", + "type": "text" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 82 + }, + "id": 66, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$influx_datasource", + "decimals": 2, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 91 + }, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "cpu/limit.mean ", + "color": "#C15C17", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\")/1000 as \"usage_rate.mean\" FROM \"cpu/usage_rate\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\")/1000 as \"usage_rate.mean\" FROM \"cpu/usage_rate\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\")/1000 as \"CPU LIMIT\" FROM \"cpu/limit\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage/s", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$influx_datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 91 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "cpu/limit.mean ", + "color": "#C15C17", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"usage_rate.mean\" FROM \"memory/usage\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"usage_rate.mean\" FROM \"memory/usage\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"CPU LIMIT\" FROM \"memory/limit\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "title": "Heapster RAM&CPU", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 83 + }, + "id": 67, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$influx_datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 10, + "x": 0, + "y": 92 + }, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/errors/", + "color": "#C15C17", + "pointradius": 3, + "points": true + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"rx_rate.mean \" FROM \"network/rx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"rx_rate.mean\" FROM \"network/rx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\")*-1 as \"tx_rate.mean \" FROM \"network/tx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\")*-1 as \"tx_rate.mean\" FROM \"network/tx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"rx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", + "rawQuery": true, + "refId": "E", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"rx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", + "rawQuery": true, + "refId": "F", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"tx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", + "rawQuery": true, + "refId": "G", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"tx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", + "rawQuery": true, + "refId": "H", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network rx[IN] / tx[OUT] in bytes/s", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$influx_datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 10, + "x": 10, + "y": 92 + }, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "cpu/limit.mean", + "color": "#C15C17", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"DISK_USAGE.mean \" FROM \"filesystem/usage\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"DISK_USAGE.mean \" FROM \"filesystem/usage\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"DISK_LIMIT.mean \" FROM \"filesystem/limit\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"DISK_LIMIT.mean\" FROM \"filesystem/limit\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$influx_datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 92 + }, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "cpu/limit.mean ", + "color": "#C15C17", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"FREE_INODES.mean \" FROM \"filesystem/inodes_free\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"FREE_INODES.mean\" FROM \"filesystem/inodes_free\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"USED_INODES.mean \" FROM \"filesystem/inodes\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "dsType": "influxdb", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") as \"USED_INODES.mean\" FROM \"filesystem/inodes\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "rawQuery": true, + "refId": "D", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of free INODES", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "title": "Heapster host stats", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 84 + }, + "id": 68, + "panels": [], + "repeat": null, + "title": "Contrac errors", + "type": "row" + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 85 + }, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "M", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Net errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "5m", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "custom" + ], + "templating": { + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "2m", + "current": { + "text": "auto", + "value": "$__auto_interval_aggregation_interval" + }, + "hide": 0, + "label": "aggregation intarval", + "name": "aggregation_interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_aggregation_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + }, + { + "allValue": null, + "current": {}, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(prometheus_build_info, instance)", + "refresh": 2, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "value": "${VAR_SCRAPE_INTERVAL}", + "text": "${VAR_SCRAPE_INTERVAL}" + }, + "hide": 0, + "label": "Scrape interval seconds", + "name": "scrape_interval", + "options": [ + { + "value": "${VAR_SCRAPE_INTERVAL}", + "text": "${VAR_SCRAPE_INTERVAL}" + } + ], + "query": "${VAR_SCRAPE_INTERVAL}", + "type": "constant" + }, + { + "current": { + "text": "prometheus", + "value": "prometheus" + }, + "hide": 0, + "label": "Prometheus datasource", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "current": { + "text": "No data sources found", + "value": "" + }, + "hide": 0, + "label": "InfluxDB datasource", + "name": "influx_datasource", + "options": [], + "query": "influxdb", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Prometheus2.0", + "uid": "XmsJC9mRz", + "version": 4 +} \ No newline at end of file diff --git a/grafana-dashboards/Traefik Realtime Metrics-1520350498858.json b/grafana-dashboards/Traefik Realtime Metrics-1520350498858.json new file mode 100644 index 0000000..0a0d9d5 --- /dev/null +++ b/grafana-dashboards/Traefik Realtime Metrics-1520350498858.json @@ -0,0 +1,1146 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.2.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Visualize Traefik Metrics", + "editable": true, + "gnetId": 2870, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(traefik_request_duration_seconds_sum) / sum(traefik_requests_total) * 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "300,600", + "title": "Average Response Time (ms)", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 0, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "min(time() - process_start_time_seconds{job=\"traefik-ingress-lb\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(26, 206, 22, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(traefik_requests_total{code=\"404\",method=\"GET\",exported_service=\"http\"}[5m])) * 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "traefik_requests_total", + "refId": "A", + "step": 60 + } + ], + "thresholds": "0,1", + "title": "404 Error Count last 5 Minutes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "max" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 2 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_requests_total{exported_service=\"http\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{http}}", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "expr": "sum(traefik_requests_total{exported_service=\"https\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{https}}", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "Count", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 1000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Average response time alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_request_duration_seconds_sum) / sum(traefik_requests_total) * 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average response time (ms)", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1000 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Average response time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 2 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_requests_total[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{requests}}", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests in last 5 minutes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 11, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traefik_requests_total{exported_service=~\"http|https\",code=\"200\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{service}} {{method}} {{code}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Successful Status Code Count (5min)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 11, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traefik_requests_total{exported_service=~\"http|https\",code!=\"200\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{service}} {{method}} {{code}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bad Status Code Count (5m)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 11, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "traefik_requests_total{exported_service!~\"http\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{service}} {{method}} {{code}}", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests by Request Code", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traefik_requests_total{exported_service=~\"http|https\",code=\"200\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{method}} : {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Status code 200 over 5min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traefik_requests_total{exported_service=~\"http|https\",code!=\"200\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ method }} : {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Others status code over 5min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS}", + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 17, + "interval": null, + "legend": { + "percentage": true, + "show": true, + "values": false + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "sum(rate(traefik_requests_total{exported_service !~ \"http|https\"}[5m])) by (exported_service) ", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ exported_service}}", + "refId": "A" + } + ], + "title": "Requests by service", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS}", + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 19, + "interval": null, + "legend": { + "percentage": true, + "show": true, + "values": false + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "sum(rate(traefik_requests_total{exported_service =~ \"http|https\"}[5m])) by (exported_service) ", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ exported_service}}", + "refId": "A" + } + ], + "title": "Requests by protocol", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "custom" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Traefik Realtime Metrics", + "uid": "000000010", + "version": 6 +} \ No newline at end of file