diff --git a/grafana-dashboards/prometheus-dashboard.json b/grafana-dashboards/prometheus-dashboard.json index c73f4d6..9e6f2d6 100644 --- a/grafana-dashboards/prometheus-dashboard.json +++ b/grafana-dashboards/prometheus-dashboard.json @@ -1,58 +1,126 @@ { "annotations": { - "list": [{ - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - }] + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": "$datasource", + "enable": true, + "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)", + "hide": false, + "iconColor": "rgb(250, 44, 18)", + "limit": 100, + "name": "downage", + "showIn": 0, + "step": "30s", + "tagKeys": "instance", + "textFormat": "prometheus down", + "titleFormat": "Downage", + "type": "alert" + }, + { + "datasource": "$datasource", + "enable": true, + "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)", + "hide": false, + "iconColor": "#fceaca", + "limit": 100, + "name": "Reload", + "showIn": 0, + "step": "5m", + "tagKeys": "instance", + "tags": [], + "titleFormat": "Reload", + "type": "tags" + } + ] }, - "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", + "description": "Dashboard for monitoring of Prometheus v2.x.x", "editable": true, - "gnetId": 162, + "gnetId": 3681, "graphTooltip": 1, - "links": [], - "panels": [{ + "id": 4, + "iteration": 1596721016726, + "links": [ + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Dashboard's Github ", + "tooltip": "Github repo of this dashboard", + "type": "link", + "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard" + }, + { + "icon": "doc", + "tags": [], + "targetBlank": true, + "title": "Prometheus Docs", + "tooltip": "", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 55, + "panels": [], + "repeat": null, + "title": "Header instance info", + "type": "row" + }, + { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ - "rgba(50, 172, 45, 0.97)", + "#299c46", "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" + "#bf1b00" ], - "datasource": "prometheus", - "editable": true, - "error": false, + "datasource": "$datasource", + "decimals": 1, "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, - "format": "percent", + "format": "s", "gauge": { - "maxValue": 100, + "maxValue": 1000000, "minValue": 0, - "show": true, + "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { - "h": 7, - "w": 8, + "h": 5, + "w": 4, "x": 0, - "y": 0 + "y": 1 }, - "id": 4, + "id": 41, "interval": null, - "isNew": true, "links": [], "mappingType": 1, - "mappingTypes": [{ + "mappingTypes": [ + { "name": "value to text", "value": 1 }, @@ -68,11 +136,13 @@ "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", - "rangeMaps": [{ - "from": "null", - "text": "N/A", - "to": "null" - }], + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, @@ -80,42 +150,132 @@ "show": false }, "tableColumn": "", - "targets": [{ - "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - }], - "thresholds": "65, 90", - "title": "Cluster memory usage", + "targets": [ + { + "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Uptime", "type": "singlestat", "valueFontSize": "80%", - "valueMaps": [{ - "op": "=", - "text": "N/A", - "value": "null" - }], + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], "valueName": "current" }, { "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, + "colorBackground": false, + "colorValue": true, "colors": [ - "rgba(0, 0, 0, 0)", - "rgb(210, 1, 1)", - "#890f02" + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#bf1b00" ], - "datasource": "prometheus", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, - "format": "percentunit", + "format": "short", + "gauge": { + "maxValue": 1000000, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 4, + "y": 1 + }, + "id": 42, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "prometheus_tsdb_head_series{instance=\"localhost:9090\", job=\"prometheus\"}", + "targets": [ + { + "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "500000,800000,1000000", + "title": "Total count of time series", + "type": "singlestat", + "valueFontSize": "150%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", "gauge": { "maxValue": 100, "minValue": 0, @@ -124,16 +284,17 @@ "thresholdMarkers": true }, "gridPos": { - "h": 2, - "w": 8, - "x": 8, - "y": 0 + "h": 5, + "w": 3, + "x": 12, + "y": 1 }, - "id": 23, + "id": 48, "interval": null, "links": [], "mappingType": 1, - "mappingTypes": [{ + "mappingTypes": [ + { "name": "value to text", "value": 1 }, @@ -149,77 +310,79 @@ "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", - "rangeMaps": [{ - "from": "null", - "text": "N/A", - "to": "null" - }], + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, - "tableColumn": "{job=\"kubelet\"}", - "targets": [{ - "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)", - "format": "time_series", - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - }], - "thresholds": "1.1", - "title": "Up Nodes", + "tableColumn": "version", + "targets": [ + { + "expr": "prometheus_build_info{instance=\"$instance\"}", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Version", "type": "singlestat", - "valueFontSize": "120%", - "valueMaps": [{ - "op": "=", - "text": "N/A", - "value": "null" - }], - "valueName": "avg" + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "first" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ - "rgba(50, 172, 45, 0.97)", + "#299c46", "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" + "#d44a3a" ], - "datasource": "prometheus", - "decimals": 0, - "editable": true, - "error": false, + "datasource": "$datasource", + "decimals": 2, "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, - "format": "percent", + "format": "ms", "gauge": { "maxValue": 100, "minValue": 0, - "show": true, + "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 0 + "h": 5, + "w": 4, + "x": 15, + "y": 1 }, - "id": 6, + "id": 49, "interval": null, - "isNew": true, "links": [], "mappingType": 1, - "mappingTypes": [{ + "mappingTypes": [ + { "name": "value to text", "value": 1 }, @@ -235,11 +398,129 @@ "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", - "rangeMaps": [{ - "from": "null", - "text": "N/A", - "to": "null" - }], + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "{instance=\"localhost:9090\", job=\"prometheus\"}", + "targets": [ + { + "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Actual head block length", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "", + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 2, + "x": 19, + "y": 1 + }, + "height": "", + "id": 50, + "links": [], + "mode": "html", + "options": { + "content": "", + "mode": "html" + }, + "pluginVersion": "7.1.0", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#e6522c", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "$datasource", + "decimals": 1, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 52, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, @@ -247,206 +528,27 @@ "show": false }, "tableColumn": "", - "targets": [{ - "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - }], - "thresholds": "65, 90", - "title": "Cluster CPU usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [{ - "op": "=", - "text": "N/A", - "value": "null" - }], - "valueName": "current" - }, - { - "columns": [], - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fontSize": "90%", - "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 2 - }, - "id": 25, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 2, - "desc": false - }, - "styles": [{ - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "link": false, - "pattern": "Time", - "type": "date" - }, + "targets": [ { - "alias": "Uptime", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/endpoint|job|namespace|pod|service/", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "preserveFormat": false, - "sanitize": false, - "thresholds": [], - "type": "string", - "unit": "short" + "expr": "2", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" } ], - "targets": [{ - "expr": "(time() - node_boot_time_seconds)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - }], - "title": "Node Uptime", - "transform": "table", + "thresholds": "10,20", + "title": "", "transparent": true, - "type": "table-old" - }, - { - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": {}, - "mappings": [{ - "from": "", - "id": 0, - "operator": "", - "text": "Up", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 1, - "operator": "", - "text": "Down", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "operator": "", - "text": "Down", - "to": "", - "type": 1, - "value": "0.5" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [{ - "color": "rgb(0, 0, 0)", - "value": null - }, - { - "color": "red", - "value": 0 - }, - { - "color": "green", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 35, - "options": { - "colorMode": "background", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "vertical", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" } - }, - "pluginVersion": "7.0.3", - "repeat": null, - "targets": [{ - "expr": "sort(avg by (job) (up{job=~\"kube-dns|kubelet|traefik.*|apiserver|kube-controller-manager|kube-scheduler\"}))", - "format": "time_series", - "instant": true, - "interval": "", - "legendFormat": "{{job}}", - "refId": "A" - }], - "timeFrom": null, - "timeShift": null, - "title": "Kubernetes Core Services", - "type": "stat" + ], + "valueName": "avg" }, { "collapsed": false, @@ -455,51 +557,22 @@ "h": 1, "w": 24, "x": 0, - "y": 9 + "y": 6 }, - "id": 15, + "id": 56, "panels": [], - "title": "Nodes", + "repeat": null, + "title": "Main info", "type": "row" }, { - "alert": { - "conditions": [{ - "evaluator": { - "params": [ - 0.85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "B", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - }], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "60s", - "handler": 1, - "name": "Memory Usage alert", - "noDataState": "no_data", - "notifications": [] - }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": {} @@ -509,13 +582,1463 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 8, "x": 0, - "y": 10 + "y": 7 }, "hiddenSeries": false, - "id": 10, + "id": 15, + "legend": { + "avg": true, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "max duration for {{slice}}", + "metric": "prometheus_local_storage_rushed_mode", + "refId": "A", + "step": 900 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Query elapsed time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "created on {{ instance }}", + "metric": "prometheus_local_storage_maintain_series_duration_seconds_count", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "removed on {{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Head series created/deleted", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "exceeded_sample_limit on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "duplicate_timestamp on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "B", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "out_of_bounds on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "C", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "out_of_order on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "D", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "rule_evaluation_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "G", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "tsdb_compactions_failed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "K", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "tsdb_reloads_failures on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "L", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "head_series_not_found on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "N", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "evaluator_iterations_missed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "O", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "evaluator_iterations_skipped on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "P", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Prometheus errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 57, + "panels": [], + "repeat": null, + "title": "Scrape & rule duration", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "hiddenSeries": false, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": false, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval", + "format": "time_series", + "interval": "2m", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 300 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scrape delay (counts with 1m scrape interval)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Queue length", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Queue length", + "metric": "prometheus_local_storage_indexing_queue_length", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Rule evaulation duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 58, + "panels": [], + "repeat": null, + "title": "Requests & queries", + "type": "row" + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ handler }} on {{ instance }}", + "metric": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 23 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ handler }} on {{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request duration per handler", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 23 + }, + "hiddenSeries": false, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ handler }} in {{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request size by handler", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Allocated bytes": "#F9BA8F", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max count collector": "#bf1b00", + "Max count harvester": "#bf1b00", + "Max to persist": "#3F6833", + "RSS": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 23 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Max.*/", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current count ", + "metric": "last", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Max count", + "metric": "last", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cont of concurent queries", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 59, + "panels": [], + "repeat": null, + "title": "Alerting", + "type": "row" + }, + { + "aliasColors": { + "Alert queue capacity on o collector": "#bf1b00", + "Alert queue capacity on o harvester": "#bf1b00", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 31 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*capacity.*/", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Alert queue capacity ", + "metric": "prometheus_local_storage_checkpoint_last_size_bytes", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Alert queue size on ", + "metric": "prometheus_local_storage_checkpoint_last_size_bytes", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Alert queue size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 31 + }, + "hiddenSeries": false, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Checkpoint chunks written/s", + "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Count of discovered alertmanagers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 31 + }, + "hiddenSeries": false, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "notifications_dropped on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "F", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "rule_evaluation_failures on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Alerting errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 60, + "panels": [], + "repeat": null, + "title": "Service discovery", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 43, "legend": { "avg": false, "current": false, @@ -529,10 +2052,8 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "dataLinks": [] - }, "percentage": false, + "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -540,35 +2061,468 @@ "spaceLength": 10, "stack": false, "steppedLine": false, - "targets": [{ - "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)", + "targets": [ + { + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Consul target sync count", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Consul SD sync count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 39 + }, + "hiddenSeries": false, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Marathon target sync count", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Marathon SD sync count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 39 + }, + "hiddenSeries": false, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Count of target synces", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kubernetes SD sync count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 39 + }, + "hiddenSeries": false, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "exceeded_sample_limit on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_file_read_error on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "E", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_consul_rpc_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "H", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_marathon_refresh_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "I", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_openstack_refresh_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "J", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Service discovery errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 61, + "panels": [], + "repeat": null, + "title": "TSDB stats", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 47 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ instance }}", "refId": "A" - }, - { - "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "refId": "B" } ], - "thresholds": [{ - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.85 - }], + "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Memory Usage", + "title": "Reloaded block from disk", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -579,8 +2533,9 @@ "show": true, "values": [] }, - "yaxes": [{ - "format": "decbytes", + "yaxes": [ + { + "format": "short", "label": null, "logBase": 1, "max": null, @@ -602,42 +2557,18 @@ } }, { - "alert": { - "conditions": [{ - "evaluator": { - "params": [ - 90 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "15m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - }], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Usage alert", - "noDataState": "no_data", - "notifications": [] + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" }, - "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": {} @@ -647,19 +2578,19 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 10 + "h": 7, + "w": 6, + "x": 6, + "y": 47 }, "hiddenSeries": false, - "id": 11, + "id": 5, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, @@ -667,10 +2598,8 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "dataLinks": [] - }, "percentage": false, + "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -678,24 +2607,446 @@ "spaceLength": 10, "stack": false, "steppedLine": false, - "targets": [{ - "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", - "format": "time_series", - "intervalFactor": 3, - "legendFormat": "{{instance}}", - "refId": "A" - }], - "thresholds": [{ - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 90 - }], + "targets": [ + { + "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Loaded data blocks", + "metric": "prometheus_local_storage_memory_chunkdescs", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "CPU Usage", + "title": "Loaded data blocks", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 47 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Time series count", + "metric": "prometheus_local_storage_memory_series", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time series total count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 47 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "samples/s {{instance}}", + "metric": "prometheus_local_storage_ingested_samples_total", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Samples Appended per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 62, + "panels": [], + "repeat": null, + "title": "Head block stats", + "type": "row" + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "To persist": "#9AC48A" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 55 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Max.*/", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Head chunk count", + "metric": "prometheus_local_storage_memory_chunks", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Head chunks count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 55 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Length of head block", "tooltip": { "shared": true, "sort": 0, @@ -709,9 +3060,9 @@ "show": true, "values": [] }, - "yaxes": [{ - "decimals": null, - "format": "percent", + "yaxes": [ + { + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -733,124 +3084,537 @@ } }, { - "columns": [], - "datasource": "prometheus", + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, - "fontSize": "100%", + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 19 + "h": 7, + "w": 8, + "x": 16, + "y": 55 }, - "id": 31, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 15, - "desc": true - }, - "styles": [{ - "$$hashKey": "object:5708", - "alias": "Time", - "align": "auto", - "dateFormat": "HH:mm", - "link": false, - "pattern": "Time", - "type": "date" + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "created on {{ instance }}", + "refId": "B" }, { - "$$hashKey": "object:5709", - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/__name__|endpoint|fstype|alertstate|long|mountpoint|namespace|Value|short|pod|service|job/", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "$$hashKey": "object:5710", - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" + "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "deleted on {{ instance }}", + "refId": "C" } ], - "targets": [{ - "expr": "ALERTS{alertstate=\"firing\", alertname!=\"Watchdog\"}", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - }], - "title": "Active Alerts", - "transform": "table", - "type": "table-old" + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Head Chunks Created/Deleted per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "dashboardFilter": "", - "dashboardTags": [], + "collapsed": false, "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 62 + }, + "id": 63, + "panels": [], + "repeat": null, + "title": "Data maintenance", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, - "folderId": null, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 9, - "w": 5, - "x": 12, - "y": 19 + "h": 7, + "w": 6, + "x": 0, + "y": 63 }, - "id": 27, - "limit": 10, + "hiddenSeries": false, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "nameFilter": "", - "onlyAlertsOnDashboard": false, - "show": "current", - "sortOrder": 1, - "stateFilter": [], - "title": "Alarms", - "type": "alertlist" + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Compaction duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 63 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} on {{ instance }}", + "refId": "A" + } ], - "datasource": "prometheus", + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Go Garbage collection duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 63 + }, + "hiddenSeries": false, + "id": 37, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} on {{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "WAL truncate duration seconds", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 63 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} {{ instance }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "WAL fsync duration seconds", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 64, + "panels": [], + "repeat": null, + "title": "RAM&CPU", + "type": "row" + }, + { + "aliasColors": { + "Allocated bytes": "#7EB26D", + "Allocated bytes - 1m max": "#BF1B00", + "Allocated bytes - 1m min": "#BF1B00", + "Allocated bytes - 5m max": "#BF1B00", + "Allocated bytes - 5m min": "#BF1B00", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "RSS": "#447EBC" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", "decimals": null, "editable": true, "error": false, @@ -860,128 +3624,16 @@ }, "overrides": [] }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 7, - "x": 17, - "y": 19 - }, - "id": 7, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [{ - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [{ - "from": "null", - "text": "N/A", - "to": "null" - }], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [{ - "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "metric": "", - "refId": "A", - "step": 10 - }], - "thresholds": "65, 90", - "title": "Cluster Filesystem usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [{ - "op": "=", - "text": "N/A", - "value": "null" - }], - "valueName": "current" - }, - { - "alert": { - "alertRuleTags": {}, - "conditions": [{ - "evaluator": { - "params": [ - 1 - ], - "type": "lt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "C", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - }], - "executionErrorState": "alerting", - "for": "2m", - "frequency": "60s", - "handler": 1, - "name": "Node Down", - "noDataState": "alerting", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 4, - "w": 7, - "x": 17, - "y": 24 + "h": 7, + "w": 8, + "x": 0, + "y": 71 }, "hiddenSeries": false, - "id": 29, + "id": 6, "legend": { "avg": false, "current": false, @@ -994,41 +3646,64 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, + "nullPointMode": "null", "percentage": false, + "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/-/", + "fill": 0 + }, + { + "alias": "collector heap size", + "color": "#E0752D", + "fill": 0, + "linewidth": 2 + }, + { + "alias": "collector kubernetes memory limit", + "color": "#BF1B00", + "fill": 0, + "linewidth": 3 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, - "targets": [{ - "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "C" - }], - "thresholds": [{ - "colorMode": "critical", - "fill": true, - "line": true, - "op": "lt", - "value": 1 - }], + "targets": [ + { + "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Total resident memory - {{instance}}", + "metric": "process_resident_memory_bytes", + "refId": "B", + "step": 1800 + }, + { + "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Total llocated bytes - {{instance}}", + "metric": "go_memstats_alloc_bytes", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Up Nodes", + "title": "Memory", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1039,17 +3714,16 @@ "show": true, "values": [] }, - "yaxes": [{ - "$$hashKey": "object:14563", - "format": "short", + "yaxes": [ + { + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "$$hashKey": "object:14564", "format": "short", "label": null, "logBase": 1, @@ -1064,42 +3738,20 @@ } }, { - "alert": { - "conditions": [{ - "evaluator": { - "params": [ - 85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - }], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Temperature alert", - "noDataState": "no_data", - "notifications": [] + "aliasColors": { + "Allocated bytes": "#F9BA8F", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "RSS": "#890F02" }, - "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": {} @@ -1109,19 +3761,19 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 28 + "h": 7, + "w": 8, + "x": 8, + "y": 71 }, "hiddenSeries": false, - "id": 13, + "id": 7, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, @@ -1129,10 +3781,8 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "dataLinks": [] - }, "percentage": false, + "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -1140,27 +3790,26 @@ "spaceLength": 10, "stack": false, "steppedLine": false, - "targets": [{ - "expr": "rpi_cpu_temperature_celsius", - "format": "time_series", - "intervalFactor": 5, - "legendFormat": "{{instance}}", - "refId": "A" - }], - "thresholds": [{ - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 85 - }], + "targets": [ + { + "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Allocated Bytes/s", + "metric": "go_memstats_alloc_bytes", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "CPU Temperature", + "title": "Allocations per second", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -1171,12 +3820,13 @@ "show": true, "values": [] }, - "yaxes": [{ - "format": "celsius", + "yaxes": [ + { + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -1194,609 +3844,396 @@ } }, { - "collapsed": false, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 71 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "CPU/s", + "metric": "prometheus_local_storage_ingested_samples_total", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + "avg" + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 34 + "y": 78 }, - "id": 17, - "panels": [], - "title": "Pods", + "id": 68, + "panels": [ + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 79 + }, + "hiddenSeries": false, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "M", + "step": 1800 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Net errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Contrac errors", "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 0, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 35 - }, - "hiddenSeries": false, - "id": 3, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 270, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [{ - "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ pod}}", - "metric": "container_cpu", - "refId": "A", - "step": 10 - }], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pod CPU usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [{ - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 250, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [{ - "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))", - "format": "time_series", - "hide": true, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - }, - { - "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pod memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [{ - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 49 - }, - "hiddenSeries": false, - "id": 19, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 550, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [{ - "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "refId": "A", - "step": 240 - }, - { - "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sent Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [{ - "format": "Bps", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 10, - "max": 8, - "min": 0, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 49 - }, - "hiddenSeries": false, - "id": 21, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 150, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [{ - "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Received Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [{ - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 56 - }, - "hiddenSeries": false, - "id": 8, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 220, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [{ - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Receive Traffic", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Transmit Traffic", - "metric": "network", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pod Network i/o", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [{ - "$$hashKey": "object:1163", - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1164", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } } ], - "refresh": "10s", - "schemaVersion": 25, + "refresh": "5m", + "schemaVersion": 26, "style": "dark", "tags": [ - "custom", - "overview" + "custom" ], "templating": { - "list": [] + "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "2m", + "current": { + "selected": false, + "text": "auto", + "value": "$__auto_interval_aggregation_interval" + }, + "hide": 0, + "label": "aggregation intarval", + "name": "aggregation_interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_aggregation_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "localhost:9090", + "value": "localhost:9090" + }, + "datasource": "$datasource", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(prometheus_build_info, instance)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "text": "60", + "value": "60" + }, + "hide": 0, + "label": "Scrape interval seconds", + "name": "scrape_interval", + "options": [ + { + "text": "60", + "value": "60" + } + ], + "query": "60", + "skipUrlSync": false, + "type": "constant" + }, + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Prometheus datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "No data sources found", + "value": "" + }, + "hide": 0, + "includeAll": false, + "label": "InfluxDB datasource", + "multi": false, + "name": "influx_datasource", + "options": [], + "query": "influxdb", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] }, "time": { "from": "now-3h", @@ -1804,6 +4241,7 @@ }, "timepicker": { "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -1827,6 +4265,7 @@ ] }, "timezone": "browser", - "title": "⭐️ Kubernetes Monitoring Overview", - "version": 1 -} + "title": "Prometheus Monitoring", + "uid": "XmsJC9mRz", + "version": 2 +} \ No newline at end of file diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 714b351..e7a908e 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -2015,7 +2015,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / API server", - "uid": "", + "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b", "version": 0 } kind: ConfigMap @@ -3840,7 +3840,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Networking / Cluster", - "uid": "", + "uid": "ff635a025bcfea7bc3dd4f508990a3e9", "version": 0 } kind: ConfigMap @@ -4966,7 +4966,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Controller Manager", - "uid": "", + "uid": "72e0e05bef5099e5f049b05fdc429ed4", "version": 0 } kind: ConfigMap @@ -6445,7 +6445,6 @@ items: }, "timezone": "utc", "title": "CoreDNS", - "uid": "", "version": 1 } kind: ConfigMap @@ -8988,7 +8987,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Compute Resources / Cluster", - "uid": "", + "uid": "efa86fd1d0c121a26444b636a3f509a8", "version": 0 } kind: ConfigMap @@ -11236,7 +11235,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Compute Resources / Namespace (Pods)", - "uid": "", + "uid": "85a562078cdf77779eaa1add43ccec1e", "version": 0 } kind: ConfigMap @@ -12183,7 +12182,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Compute Resources / Node (Pods)", - "uid": "", + "uid": "200ac8fdbfbb74b39aff88118e4d1c2c", "version": 0 } kind: ConfigMap @@ -13924,7 +13923,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Compute Resources / Pod", - "uid": "", + "uid": "6581e46e4e5c7ba40a07646395ef7b23", "version": 0 } kind: ConfigMap @@ -15923,7 +15922,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Compute Resources / Workload", - "uid": "", + "uid": "a164a7f0339f99e89cea5cb47e9be617", "version": 0 } kind: ConfigMap @@ -18078,7 +18077,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Compute Resources / Namespace (Workloads)", - "uid": "", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5", "version": 0 } kind: ConfigMap @@ -20577,7 +20576,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Kubelet", - "uid": "", + "uid": "3138fa155d5915769fbded898ac09fd9", "version": 0 } kind: ConfigMap @@ -22666,7 +22665,6 @@ items: }, "timezone": "browser", "title": "Kubernetes cluster monitoring (via Prometheus)", - "uid": "", "version": 1 } kind: ConfigMap @@ -24079,7 +24077,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Networking / Namespace (Pods)", - "uid": "", + "uid": "8b7a8b326d7a6f1f04244066368c67af", "version": 0 } kind: ConfigMap @@ -25760,7 +25758,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Networking / Namespace (Workload)", - "uid": "", + "uid": "bbb2a765a623ae38130206c7d94a160f", "version": 0 } kind: ConfigMap @@ -26708,7 +26706,7 @@ items: }, "timezone": "UTC", "title": "USE Method / Cluster", - "uid": "", + "uid": "3e97d1d02672cdd0861f4c97c64f89b2", "version": 0 } kind: ConfigMap @@ -27683,7 +27681,7 @@ items: }, "timezone": "UTC", "title": "USE Method / Node", - "uid": "", + "uid": "fac67cfbe174d3ef53eb473d73d9212f", "version": 0 } kind: ConfigMap @@ -28661,7 +28659,7 @@ items: }, "timezone": "UTC", "title": "Nodes", - "uid": "", + "uid": "fa49a4706d07a042595b664c87fb33ea", "version": 0 } kind: ConfigMap @@ -29220,7 +29218,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Persistent Volumes", - "uid": "", + "uid": "919b92a8e8041bd567af9edab12c840c", "version": 0 } kind: ConfigMap @@ -30400,7 +30398,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Networking / Pod", - "uid": "", + "uid": "7a18067ce943a40ae25454675c19ff5c", "version": 0 } kind: ConfigMap @@ -30421,29 +30419,100 @@ items: "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" + }, + { + "datasource": "$datasource", + "enable": true, + "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)", + "hide": false, + "iconColor": "rgb(250, 44, 18)", + "limit": 100, + "name": "downage", + "showIn": 0, + "step": "30s", + "tagKeys": "instance", + "textFormat": "prometheus down", + "titleFormat": "Downage", + "type": "alert" + }, + { + "datasource": "$datasource", + "enable": true, + "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)", + "hide": false, + "iconColor": "#fceaca", + "limit": 100, + "name": "Reload", + "showIn": 0, + "step": "5m", + "tagKeys": "instance", + "tags": [ + + ], + "titleFormat": "Reload", + "type": "tags" } ] }, - "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", + "description": "Dashboard for monitoring of Prometheus v2.x.x", "editable": true, - "gnetId": 162, + "gnetId": 3681, "graphTooltip": 1, + "id": 4, + "iteration": 1596721016726, "links": [ + { + "icon": "info", + "tags": [ + ], + "targetBlank": true, + "title": "Dashboard's Github ", + "tooltip": "Github repo of this dashboard", + "type": "link", + "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard" + }, + { + "icon": "doc", + "tags": [ + + ], + "targetBlank": true, + "title": "Prometheus Docs", + "tooltip": "", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } ], "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 55, + "panels": [ + + ], + "repeat": null, + "title": "Header instance info", + "type": "row" + }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ - "rgba(50, 172, 45, 0.97)", + "#299c46", "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" + "#bf1b00" ], - "datasource": "prometheus", - "editable": true, - "error": false, + "datasource": "$datasource", + "decimals": 1, "fieldConfig": { "defaults": { "custom": { @@ -30454,23 +30523,22 @@ items: ] }, - "format": "percent", + "format": "s", "gauge": { - "maxValue": 100, + "maxValue": 1000000, "minValue": 0, - "show": true, + "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { - "h": 7, - "w": 8, + "h": 5, + "w": 4, "x": 0, - "y": 0 + "y": 1 }, - "id": 4, + "id": 41, "interval": null, - "isNew": true, "links": [ ], @@ -30508,16 +30576,15 @@ items: "tableColumn": "", "targets": [ { - "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", + "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})", "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 + "instant": false, + "intervalFactor": 2, + "refId": "A" } ], - "thresholds": "65, 90", - "title": "Cluster memory usage", + "thresholds": "", + "title": "Uptime", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -30531,14 +30598,14 @@ items: }, { "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, + "colorBackground": false, + "colorValue": true, "colors": [ - "rgba(0, 0, 0, 0)", - "rgb(210, 1, 1)", - "#890f02" + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#bf1b00" ], - "datasource": "prometheus", + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -30549,7 +30616,100 @@ items: ] }, - "format": "percentunit", + "format": "short", + "gauge": { + "maxValue": 1000000, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 4, + "y": 1 + }, + "id": 42, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "prometheus_tsdb_head_series{instance=\"localhost:9090\", job=\"prometheus\"}", + "targets": [ + { + "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "500000,800000,1000000", + "title": "Total count of time series", + "type": "singlestat", + "valueFontSize": "150%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "format": "none", "gauge": { "maxValue": 100, "minValue": 0, @@ -30558,12 +30718,12 @@ items: "thresholdMarkers": true }, "gridPos": { - "h": 2, - "w": 8, - "x": 8, - "y": 0 + "h": 5, + "w": 3, + "x": 12, + "y": 1 }, - "id": 23, + "id": 48, "interval": null, "links": [ @@ -30599,22 +30759,242 @@ items: "lineColor": "rgb(31, 120, 193)", "show": false }, - "tableColumn": "{job=\"kubelet\"}", + "tableColumn": "version", "targets": [ { - "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)", - "format": "time_series", + "expr": "prometheus_build_info{instance=\"$instance\"}", + "format": "table", "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", + "intervalFactor": 2, "refId": "A" } ], - "thresholds": "1.1", - "title": "Up Nodes", + "thresholds": "", + "title": "Version", "type": "singlestat", - "valueFontSize": "120%", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "first" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 2, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 15, + "y": 1 + }, + "id": 49, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "{instance=\"localhost:9090\", job=\"prometheus\"}", + "targets": [ + { + "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Actual head block length", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "", + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 5, + "w": 2, + "x": 19, + "y": 1 + }, + "height": "", + "id": 50, + "links": [ + + ], + "mode": "html", + "options": { + "content": "", + "mode": "html" + }, + "pluginVersion": "7.1.0", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#e6522c", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "$datasource", + "decimals": 1, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 52, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "10,20", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "200%", "valueMaps": [ { "op": "=", @@ -30625,16 +31005,30 @@ items: "valueName": "avg" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 56, + "panels": [ + ], - "datasource": "prometheus", - "decimals": 0, + "repeat": null, + "title": "Main info", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", "editable": true, "error": false, "fieldConfig": { @@ -30647,287 +31041,434 @@ items: ] }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 }, + "hiddenSeries": false, + "id": 15, + "legend": { + "avg": true, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "max duration for {{slice}}", + "metric": "prometheus_local_storage_rushed_mode", + "refId": "A", + "step": 900 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Query elapsed time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "hiddenSeries": false, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "created on {{ instance }}", + "metric": "prometheus_local_storage_maintain_series_duration_seconds_count", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "removed on {{ instance }}", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Head series created/deleted", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 0 - }, - "id": 6, - "interval": null, - "isNew": true, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster CPU usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "columns": [ - - ], - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fontSize": "90%", - "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 2 - }, - "id": 25, - "links": [ - - ], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 2, - "desc": false - }, - "styles": [ - { - "alias": "Time", - "align": "auto", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "link": false, - "pattern": "Time", - "type": "date" - }, - { - "alias": "Uptime", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - - ], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/endpoint|job|namespace|pod|service/", - "thresholds": [ - - ], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "preserveFormat": false, - "sanitize": false, - "thresholds": [ - - ], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "(time() - node_boot_time_seconds)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "Node Uptime", - "transform": "table", - "transparent": true, - "type": "table-old" - }, - { - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { - - }, - "mappings": [ - { - "from": "", - "id": 0, - "operator": "", - "text": "Up", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 1, - "operator": "", - "text": "Down", - "to": "", - "type": 1, - "value": "0" - }, - { - "from": "", - "id": 2, - "operator": "", - "text": "Down", - "to": "", - "type": 1, - "value": "0.5" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(0, 0, 0)", - "value": null - }, - { - "color": "red", - "value": 0 - }, - { - "color": "green", - "value": 1 - } - ] - } - }, - "overrides": [ - - ] - }, - "gridPos": { - "h": 2, - "w": 24, - "x": 0, "y": 7 }, - "id": 35, - "options": { - "colorMode": "background", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "vertical", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false }, - "pluginVersion": "7.0.3", - "repeat": null, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sort(avg by (job) (up{job=~\"kube-dns|kubelet|traefik.*|apiserver|kube-controller-manager|kube-scheduler\"}))", + "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", "format": "time_series", - "instant": true, "interval": "", - "legendFormat": "{{job}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "exceeded_sample_limit on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "duplicate_timestamp on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "B", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "out_of_bounds on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "C", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "out_of_order on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "D", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "rule_evaluation_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "G", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "tsdb_compactions_failed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "K", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "tsdb_reloads_failures on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "L", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "head_series_not_found on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "N", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "evaluator_iterations_missed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "O", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "evaluator_iterations_skipped on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "P", + "step": 1800 } + ], + "thresholds": [ + ], "timeFrom": null, + "timeRegions": [ + + ], "timeShift": null, - "title": "Kubernetes Core Services", - "type": "stat" + "title": "Prometheus errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -30936,61 +31477,150 @@ items: "h": 1, "w": 24, "x": 0, - "y": 9 + "y": 14 }, - "id": 15, + "id": 57, "panels": [ ], - "title": "Nodes", + "repeat": null, + "title": "Scrape & rule duration", "type": "row" }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0.85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "B", - "5m", - "now" - ] - }, - "reducer": { - "params": [ - - ], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "60s", - "handler": 1, - "name": "Memory Usage alert", - "noDataState": "no_data", - "notifications": [ - - ] - }, "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "grid": { + + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "hiddenSeries": false, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": false, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval", + "format": "time_series", + "interval": "2m", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 300 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Scrape delay (counts with 1m scrape interval)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { @@ -31004,19 +31634,19 @@ items: "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, + "h": 7, "w": 12, - "x": 0, - "y": 10 + "x": 12, + "y": 15 }, "hiddenSeries": false, - "id": 10, + "id": 14, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, @@ -31026,12 +31656,146 @@ items: ], "nullPointMode": "null", - "options": { - "dataLinks": [ + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Queue length", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Queue length", + "metric": "prometheus_local_storage_indexing_queue_length", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Rule evaulation duration", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 58, + "panels": [ + + ], + "repeat": null, + "title": "Requests & queries", + "type": "row" + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", "percentage": false, + "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -31043,38 +31807,147 @@ items: "steppedLine": false, "targets": [ { - "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)", + "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "A" + "legendFormat": "{{ handler }} on {{ instance }}", + "metric": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Request count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 23 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0", "format": "time_series", - "hide": true, - "intervalFactor": 1, + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ handler }} on {{ instance }}", "refId": "B" } ], "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.85 - } + ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Memory Usage", + "title": "Request duration per handler", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -31089,11 +31962,11 @@ items: }, "yaxes": [ { - "format": "decbytes", + "format": "µs", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -31111,50 +31984,18 @@ items: } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 90 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "15m", - "now" - ] - }, - "reducer": { - "params": [ - - ], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Usage alert", - "noDataState": "no_data", - "notifications": [ - - ] - }, "aliasColors": { - + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { @@ -31168,13 +32009,674 @@ items: "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 6, "x": 12, - "y": 10 + "y": 23 }, "hiddenSeries": false, - "id": 11, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ handler }} in {{ instance }}", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Request size by handler", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Allocated bytes": "#F9BA8F", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max count collector": "#bf1b00", + "Max count harvester": "#bf1b00", + "Max to persist": "#3F6833", + "RSS": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 23 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Max.*/", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Current count ", + "metric": "last", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Max count", + "metric": "last", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Cont of concurent queries", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 59, + "panels": [ + + ], + "repeat": null, + "title": "Alerting", + "type": "row" + }, + { + "aliasColors": { + "Alert queue capacity on o collector": "#bf1b00", + "Alert queue capacity on o harvester": "#bf1b00", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 31 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*capacity.*/", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Alert queue capacity ", + "metric": "prometheus_local_storage_checkpoint_last_size_bytes", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Alert queue size on ", + "metric": "prometheus_local_storage_checkpoint_last_size_bytes", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Alert queue size", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 31 + }, + "hiddenSeries": false, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Checkpoint chunks written/s", + "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Count of discovered alertmanagers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 31 + }, + "hiddenSeries": false, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "notifications_dropped on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "F", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "rule_evaluation_failures on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Alerting errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 60, + "panels": [ + + ], + "repeat": null, + "title": "Service discovery", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 43, "legend": { "avg": false, "current": false, @@ -31190,12 +32692,8 @@ items: ], "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, "percentage": false, + "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -31207,28 +32705,23 @@ items: "steppedLine": false, "targets": [ { - "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])", "format": "time_series", - "intervalFactor": 3, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "Consul target sync count", + "refId": "A", + "step": 240 } ], "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 90 - } + ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "CPU Usage", + "title": "Consul SD sync count", "tooltip": { "shared": true, "sort": 0, @@ -31246,8 +32739,7 @@ items: }, "yaxes": [ { - "decimals": null, - "format": "percent", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -31269,10 +32761,13 @@ items: } }, { - "columns": [ + "aliasColors": { - ], - "datasource": "prometheus", + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", "fieldConfig": { "defaults": { "custom": { @@ -31283,93 +32778,509 @@ items: ] }, - "fontSize": "100%", + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 19 + "h": 7, + "w": 6, + "x": 6, + "y": 39 }, - "id": 31, + "hiddenSeries": false, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 15, - "desc": true - }, - "styles": [ - { - "$$hashKey": "object:5708", - "alias": "Time", - "align": "auto", - "dateFormat": "HH:mm", - "link": false, - "pattern": "Time", - "type": "date" - }, - { - "$$hashKey": "object:5709", - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/__name__|endpoint|fstype|alertstate|long|mountpoint|namespace|Value|short|pod|service|job/", - "thresholds": [ + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ - ], - "type": "hidden", - "unit": "short" - }, - { - "$$hashKey": "object:5710", - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - - ], - "type": "number", - "unit": "short" - } ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "ALERTS{alertstate=\"firing\", alertname!=\"Watchdog\"}", - "format": "table", - "instant": true, + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Marathon target sync count", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Marathon SD sync count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 39 + }, + "hiddenSeries": false, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Count of target synces", + "refId": "A", + "step": 240 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Kubernetes SD sync count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 39 + }, + "hiddenSeries": false, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", "interval": "", - "intervalFactor": 1, - "legendFormat": "", + "intervalFactor": 2, + "legendFormat": "exceeded_sample_limit on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "A", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_file_read_error on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "E", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_consul_rpc_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "H", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_marathon_refresh_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "I", + "step": 1800 + }, + { + "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "sd_openstack_refresh_failure on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "J", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Service discovery errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 61, + "panels": [ + + ], + "repeat": null, + "title": "TSDB stats", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 47 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", "refId": "A" } ], - "title": "Active Alerts", - "transform": "table", - "type": "table-old" - }, - { - "dashboardFilter": "", - "dashboardTags": [ + "thresholds": [ ], - "datasource": null, + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Reloaded block from disk", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { @@ -31380,38 +33291,1198 @@ items: ] }, - "folderId": null, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 9, - "w": 5, - "x": 12, - "y": 19 + "h": 7, + "w": 6, + "x": 6, + "y": 47 }, - "id": 27, - "limit": 10, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [ ], - "nameFilter": "", - "onlyAlertsOnDashboard": false, - "show": "current", - "sortOrder": 1, - "stateFilter": [ + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], - "title": "Alarms", - "type": "alertlist" + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Loaded data blocks", + "metric": "prometheus_local_storage_memory_chunkdescs", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Loaded data blocks", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 47 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + ], - "datasource": "prometheus", + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Time series count", + "metric": "prometheus_local_storage_memory_series", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Time series total count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 47 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "samples/s {{instance}}", + "metric": "prometheus_local_storage_ingested_samples_total", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Samples Appended per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 62, + "panels": [ + + ], + "repeat": null, + "title": "Head block stats", + "type": "row" + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "To persist": "#9AC48A" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 55 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Max.*/", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Head chunk count", + "metric": "prometheus_local_storage_memory_chunks", + "refId": "A", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Head chunks count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 55 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Length of head block", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 55 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "created on {{ instance }}", + "refId": "B" + }, + { + "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "deleted on {{ instance }}", + "refId": "C" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Head Chunks Created/Deleted per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 62 + }, + "id": 63, + "panels": [ + + ], + "repeat": null, + "title": "Data maintenance", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 63 + }, + "hiddenSeries": false, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "B" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Compaction duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 63 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} on {{ instance }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Go Garbage collection duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 63 + }, + "hiddenSeries": false, + "id": 37, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} on {{ instance }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "WAL truncate duration seconds", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 63 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "connected", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ quantile }} {{ instance }}", + "refId": "A" + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "WAL fsync duration seconds", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 64, + "panels": [ + + ], + "repeat": null, + "title": "RAM&CPU", + "type": "row" + }, + { + "aliasColors": { + "Allocated bytes": "#7EB26D", + "Allocated bytes - 1m max": "#BF1B00", + "Allocated bytes - 1m min": "#BF1B00", + "Allocated bytes - 5m max": "#BF1B00", + "Allocated bytes - 5m min": "#BF1B00", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "RSS": "#447EBC" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", "decimals": null, "editable": true, "error": false, @@ -31425,151 +34496,16 @@ items: ] }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 7, - "x": 17, - "y": 19 - }, - "id": 7, - "interval": null, - "isNew": true, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "metric": "", - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster Filesystem usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "alert": { - "alertRuleTags": { - - }, - "conditions": [ - { - "evaluator": { - "params": [ - 1 - ], - "type": "lt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "C", - "5m", - "now" - ] - }, - "reducer": { - "params": [ - - ], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "2m", - "frequency": "60s", - "handler": 1, - "name": "Node Down", - "noDataState": "alerting", - "notifications": [ - - ] - }, - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 4, - "w": 7, - "x": 17, - "y": 24 + "h": 7, + "w": 8, + "x": 0, + "y": 71 }, "hiddenSeries": false, - "id": 29, + "id": 6, "legend": { "avg": false, "current": false, @@ -31584,51 +34520,68 @@ items: "links": [ ], - "nullPointMode": "null as zero", - "options": { - "dataLinks": [ - - ] - }, + "nullPointMode": "null", "percentage": false, + "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ - + { + "alias": "/-/", + "fill": 0 + }, + { + "alias": "collector heap size", + "color": "#E0752D", + "fill": 0, + "linewidth": 2 + }, + { + "alias": "collector kubernetes memory limit", + "color": "#BF1B00", + "fill": 0, + "linewidth": 3 + } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)", + "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)", "format": "time_series", "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "C" + "intervalFactor": 2, + "legendFormat": "Total resident memory - {{instance}}", + "metric": "process_resident_memory_bytes", + "refId": "B", + "step": 1800 + }, + { + "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Total llocated bytes - {{instance}}", + "metric": "go_memstats_alloc_bytes", + "refId": "A", + "step": 1800 } ], "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "lt", - "value": 1 - } + ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Up Nodes", + "title": "Memory", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -31643,16 +34596,14 @@ items: }, "yaxes": [ { - "$$hashKey": "object:14563", - "format": "short", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "$$hashKey": "object:14564", "format": "short", "label": null, "logBase": 1, @@ -31667,50 +34618,20 @@ items: } }, { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [ - - ], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Temperature alert", - "noDataState": "no_data", - "notifications": [ - - ] - }, "aliasColors": { - + "Allocated bytes": "#F9BA8F", + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833", + "RSS": "#890F02" }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": "$datasource", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { @@ -31724,19 +34645,19 @@ items: "fill": 1, "fillGradient": 0, "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 28 + "h": 7, + "w": 8, + "x": 8, + "y": 71 }, "hiddenSeries": false, - "id": 13, + "id": 7, "legend": { "avg": false, "current": false, "max": false, "min": false, - "show": true, + "show": false, "total": false, "values": false }, @@ -31746,12 +34667,8 @@ items: ], "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, "percentage": false, + "pluginVersion": "7.1.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -31763,31 +34680,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "rpi_cpu_temperature_celsius", + "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])", "format": "time_series", - "intervalFactor": 5, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "Allocated Bytes/s", + "metric": "go_memstats_alloc_bytes", + "refId": "A", + "step": 1800 } ], "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 85 - } + ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "CPU Temperature", + "title": "Allocations per second", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -31802,11 +34716,11 @@ items: }, "yaxes": [ { - "format": "celsius", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -31824,723 +34738,431 @@ items: } }, { - "collapsed": false, + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 71 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "CPU/s", + "metric": "prometheus_local_storage_ingested_samples_total", + "refId": "B", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "CPU per second", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + "avg" + ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 34 + "y": 78 }, - "id": 17, + "id": 68, "panels": [ + { + "aliasColors": { + "Chunks": "#1F78C1", + "Chunks to persist": "#508642", + "Max chunks": "#052B51", + "Max to persist": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 79 + }, + "hiddenSeries": false, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "7.1.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}", + "metric": "prometheus_local_storage_chunk_ops_total", + "refId": "M", + "step": 1800 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Net errors", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } ], - "title": "Pods", + "repeat": null, + "title": "Contrac errors", "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 0, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 0, - "fillGradient": 0, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 35 - }, - "hiddenSeries": false, - "id": 3, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 270, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "null as zero", - "options": { - "dataLinks": [ - - ] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ pod}}", - "metric": "container_cpu", - "refId": "A", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Pod CPU usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 0, - "fillGradient": 0, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 250, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", - "options": { - "dataLinks": [ - - ] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))", - "format": "time_series", - "hide": true, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - }, - { - "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ pod }}", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Pod memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 1, - "fillGradient": 0, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 49 - }, - "hiddenSeries": false, - "id": 19, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 550, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "refId": "A", - "step": 240 - }, - { - "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Sent Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 10, - "max": 8, - "min": 0, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 1, - "fillGradient": 0, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 49 - }, - "hiddenSeries": false, - "id": 21, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 150, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "null", - "options": { - "dataLinks": [ - - ] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "refId": "B", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Received Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 0, - "fillGradient": 0, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 56 - }, - "hiddenSeries": false, - "id": 8, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 220, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", - "options": { - "dataLinks": [ - - ] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Receive Traffic", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Transmit Traffic", - "metric": "network", - "refId": "B", - "step": 10 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Pod Network i/o", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "$$hashKey": "object:1163", - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1164", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } } ], - "refresh": "10s", - "schemaVersion": 25, + "refresh": "5m", + "schemaVersion": 26, "style": "dark", "tags": [ - "custom", - "overview" + "custom" ], "templating": { "list": [ + { + "auto": true, + "auto_count": 30, + "auto_min": "2m", + "current": { + "selected": false, + "text": "auto", + "value": "$__auto_interval_aggregation_interval" + }, + "hide": 0, + "label": "aggregation intarval", + "name": "aggregation_interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_aggregation_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "localhost:9090", + "value": "localhost:9090" + }, + "datasource": "$datasource", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [ + ], + "query": "label_values(prometheus_build_info, instance)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "text": "60", + "value": "60" + }, + "hide": 0, + "label": "Scrape interval seconds", + "name": "scrape_interval", + "options": [ + { + "text": "60", + "value": "60" + } + ], + "query": "60", + "skipUrlSync": false, + "type": "constant" + }, + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Prometheus datasource", + "multi": false, + "name": "datasource", + "options": [ + + ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "No data sources found", + "value": "" + }, + "hide": 0, + "includeAll": false, + "label": "InfluxDB datasource", + "multi": false, + "name": "influx_datasource", + "options": [ + + ], + "query": "influxdb", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } ] }, "time": { @@ -32549,6 +35171,7 @@ items: }, "timepicker": { "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -32572,9 +35195,9 @@ items: ] }, "timezone": "browser", - "title": "⭐️ Kubernetes Monitoring Overview", - "uid": "", - "version": 1 + "title": "Prometheus Monitoring", + "uid": "XmsJC9mRz", + "version": 2 } kind: ConfigMap metadata: @@ -36623,7 +39246,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Proxy", - "uid": "", + "uid": "632e265de029684c40b21cb76bca4f94", "version": 0 } kind: ConfigMap @@ -37673,7 +40296,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Scheduler", - "uid": "", + "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", "version": 0 } kind: ConfigMap @@ -38584,7 +41207,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / StatefulSets", - "uid": "", + "uid": "a31c1f46e6f727cb37c0d731a7245005", "version": 0 } kind: ConfigMap @@ -39970,7 +42593,7 @@ items: }, "timezone": "UTC", "title": "Kubernetes / Networking / Workload", - "uid": "", + "uid": "728bf77cc1166d2f3133bf25846876cc", "version": 0 } kind: ConfigMap