diff --git a/grafana-dashboards/prometheus-dashboard.json b/grafana-dashboards/prometheus-dashboard.json
index c73f4d6..9e6f2d6 100644
--- a/grafana-dashboards/prometheus-dashboard.json
+++ b/grafana-dashboards/prometheus-dashboard.json
@@ -1,58 +1,126 @@
{
"annotations": {
- "list": [{
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- }]
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ },
+ {
+ "datasource": "$datasource",
+ "enable": true,
+ "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)",
+ "hide": false,
+ "iconColor": "rgb(250, 44, 18)",
+ "limit": 100,
+ "name": "downage",
+ "showIn": 0,
+ "step": "30s",
+ "tagKeys": "instance",
+ "textFormat": "prometheus down",
+ "titleFormat": "Downage",
+ "type": "alert"
+ },
+ {
+ "datasource": "$datasource",
+ "enable": true,
+ "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)",
+ "hide": false,
+ "iconColor": "#fceaca",
+ "limit": 100,
+ "name": "Reload",
+ "showIn": 0,
+ "step": "5m",
+ "tagKeys": "instance",
+ "tags": [],
+ "titleFormat": "Reload",
+ "type": "tags"
+ }
+ ]
},
- "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ",
+ "description": "Dashboard for monitoring of Prometheus v2.x.x",
"editable": true,
- "gnetId": 162,
+ "gnetId": 3681,
"graphTooltip": 1,
- "links": [],
- "panels": [{
+ "id": 4,
+ "iteration": 1596721016726,
+ "links": [
+ {
+ "icon": "info",
+ "tags": [],
+ "targetBlank": true,
+ "title": "Dashboard's Github ",
+ "tooltip": "Github repo of this dashboard",
+ "type": "link",
+ "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard"
+ },
+ {
+ "icon": "doc",
+ "tags": [],
+ "targetBlank": true,
+ "title": "Prometheus Docs",
+ "tooltip": "",
+ "type": "link",
+ "url": "http://prometheus.io/docs/introduction/overview/"
+ }
+ ],
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 55,
+ "panels": [],
+ "repeat": null,
+ "title": "Header instance info",
+ "type": "row"
+ },
+ {
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
- "rgba(50, 172, 45, 0.97)",
+ "#299c46",
"rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
+ "#bf1b00"
],
- "datasource": "prometheus",
- "editable": true,
- "error": false,
+ "datasource": "$datasource",
+ "decimals": 1,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
- "format": "percent",
+ "format": "s",
"gauge": {
- "maxValue": 100,
+ "maxValue": 1000000,
"minValue": 0,
- "show": true,
+ "show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
- "h": 7,
- "w": 8,
+ "h": 5,
+ "w": 4,
"x": 0,
- "y": 0
+ "y": 1
},
- "id": 4,
+ "id": 41,
"interval": null,
- "isNew": true,
"links": [],
"mappingType": 1,
- "mappingTypes": [{
+ "mappingTypes": [
+ {
"name": "value to text",
"value": 1
},
@@ -68,11 +136,13 @@
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
- "rangeMaps": [{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }],
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
@@ -80,42 +150,132 @@
"show": false
},
"tableColumn": "",
- "targets": [{
- "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "refId": "A",
- "step": 10
- }],
- "thresholds": "65, 90",
- "title": "Cluster memory usage",
+ "targets": [
+ {
+ "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Uptime",
"type": "singlestat",
"valueFontSize": "80%",
- "valueMaps": [{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }],
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
"valueName": "current"
},
{
"cacheTimeout": null,
- "colorBackground": true,
- "colorValue": false,
+ "colorBackground": false,
+ "colorValue": true,
"colors": [
- "rgba(0, 0, 0, 0)",
- "rgb(210, 1, 1)",
- "#890f02"
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#bf1b00"
],
- "datasource": "prometheus",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
- "format": "percentunit",
+ "format": "short",
+ "gauge": {
+ "maxValue": 1000000,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 8,
+ "x": 4,
+ "y": 1
+ },
+ "id": 42,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "prometheus_tsdb_head_series{instance=\"localhost:9090\", job=\"prometheus\"}",
+ "targets": [
+ {
+ "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "500000,800000,1000000",
+ "title": "Total count of time series",
+ "type": "singlestat",
+ "valueFontSize": "150%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
@@ -124,16 +284,17 @@
"thresholdMarkers": true
},
"gridPos": {
- "h": 2,
- "w": 8,
- "x": 8,
- "y": 0
+ "h": 5,
+ "w": 3,
+ "x": 12,
+ "y": 1
},
- "id": 23,
+ "id": 48,
"interval": null,
"links": [],
"mappingType": 1,
- "mappingTypes": [{
+ "mappingTypes": [
+ {
"name": "value to text",
"value": 1
},
@@ -149,77 +310,79 @@
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
- "rangeMaps": [{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }],
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
- "tableColumn": "{job=\"kubelet\"}",
- "targets": [{
- "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)",
- "format": "time_series",
- "instant": true,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A"
- }],
- "thresholds": "1.1",
- "title": "Up Nodes",
+ "tableColumn": "version",
+ "targets": [
+ {
+ "expr": "prometheus_build_info{instance=\"$instance\"}",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Version",
"type": "singlestat",
- "valueFontSize": "120%",
- "valueMaps": [{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }],
- "valueName": "avg"
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "first"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
- "rgba(50, 172, 45, 0.97)",
+ "#299c46",
"rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
+ "#d44a3a"
],
- "datasource": "prometheus",
- "decimals": 0,
- "editable": true,
- "error": false,
+ "datasource": "$datasource",
+ "decimals": 2,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
- "format": "percent",
+ "format": "ms",
"gauge": {
"maxValue": 100,
"minValue": 0,
- "show": true,
+ "show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 0
+ "h": 5,
+ "w": 4,
+ "x": 15,
+ "y": 1
},
- "id": 6,
+ "id": 49,
"interval": null,
- "isNew": true,
"links": [],
"mappingType": 1,
- "mappingTypes": [{
+ "mappingTypes": [
+ {
"name": "value to text",
"value": 1
},
@@ -235,11 +398,129 @@
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
- "rangeMaps": [{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }],
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "{instance=\"localhost:9090\", job=\"prometheus\"}",
+ "targets": [
+ {
+ "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Actual head block length",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "content": "",
+ "datasource": null,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 2,
+ "x": 19,
+ "y": 1
+ },
+ "height": "",
+ "id": 50,
+ "links": [],
+ "mode": "html",
+ "options": {
+ "content": "",
+ "mode": "html"
+ },
+ "pluginVersion": "7.1.0",
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#e6522c",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "$datasource",
+ "decimals": 1,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 3,
+ "x": 21,
+ "y": 1
+ },
+ "id": 52,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
@@ -247,206 +528,27 @@
"show": false
},
"tableColumn": "",
- "targets": [{
- "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "refId": "A",
- "step": 10
- }],
- "thresholds": "65, 90",
- "title": "Cluster CPU usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }],
- "valueName": "current"
- },
- {
- "columns": [],
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "fontSize": "90%",
- "gridPos": {
- "h": 5,
- "w": 8,
- "x": 8,
- "y": 2
- },
- "id": 25,
- "links": [],
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 2,
- "desc": false
- },
- "styles": [{
- "alias": "Time",
- "align": "auto",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "link": false,
- "pattern": "Time",
- "type": "date"
- },
+ "targets": [
{
- "alias": "Uptime",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Value",
- "thresholds": [],
- "type": "number",
- "unit": "s"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/endpoint|job|namespace|pod|service/",
- "thresholds": [],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "instance",
- "preserveFormat": false,
- "sanitize": false,
- "thresholds": [],
- "type": "string",
- "unit": "short"
+ "expr": "2",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
}
],
- "targets": [{
- "expr": "(time() - node_boot_time_seconds)",
- "format": "table",
- "instant": true,
- "intervalFactor": 1,
- "refId": "A"
- }],
- "title": "Node Uptime",
- "transform": "table",
+ "thresholds": "10,20",
+ "title": "",
"transparent": true,
- "type": "table-old"
- },
- {
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "mappings": [{
- "from": "",
- "id": 0,
- "operator": "",
- "text": "Up",
- "to": "",
- "type": 1,
- "value": "1"
- },
- {
- "from": "",
- "id": 1,
- "operator": "",
- "text": "Down",
- "to": "",
- "type": 1,
- "value": "0"
- },
- {
- "from": "",
- "id": 2,
- "operator": "",
- "text": "Down",
- "to": "",
- "type": 1,
- "value": "0.5"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [{
- "color": "rgb(0, 0, 0)",
- "value": null
- },
- {
- "color": "red",
- "value": 0
- },
- {
- "color": "green",
- "value": 1
- }
- ]
- }
- },
- "overrides": []
- },
- "gridPos": {
- "h": 2,
- "w": 24,
- "x": 0,
- "y": 7
- },
- "id": 35,
- "options": {
- "colorMode": "background",
- "graphMode": "area",
- "justifyMode": "auto",
- "orientation": "vertical",
- "reduceOptions": {
- "calcs": [
- "lastNotNull"
- ],
- "fields": "",
- "values": false
+ "type": "singlestat",
+ "valueFontSize": "200%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
}
- },
- "pluginVersion": "7.0.3",
- "repeat": null,
- "targets": [{
- "expr": "sort(avg by (job) (up{job=~\"kube-dns|kubelet|traefik.*|apiserver|kube-controller-manager|kube-scheduler\"}))",
- "format": "time_series",
- "instant": true,
- "interval": "",
- "legendFormat": "{{job}}",
- "refId": "A"
- }],
- "timeFrom": null,
- "timeShift": null,
- "title": "Kubernetes Core Services",
- "type": "stat"
+ ],
+ "valueName": "avg"
},
{
"collapsed": false,
@@ -455,51 +557,22 @@
"h": 1,
"w": 24,
"x": 0,
- "y": 9
+ "y": 6
},
- "id": 15,
+ "id": 56,
"panels": [],
- "title": "Nodes",
+ "repeat": null,
+ "title": "Main info",
"type": "row"
},
{
- "alert": {
- "conditions": [{
- "evaluator": {
- "params": [
- 0.85
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "B",
- "5m",
- "now"
- ]
- },
- "reducer": {
- "params": [],
- "type": "max"
- },
- "type": "query"
- }],
- "executionErrorState": "alerting",
- "for": "0m",
- "frequency": "60s",
- "handler": 1,
- "name": "Memory Usage alert",
- "noDataState": "no_data",
- "notifications": []
- },
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
"fieldConfig": {
"defaults": {
"custom": {}
@@ -509,13 +582,1463 @@
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
+ "h": 7,
+ "w": 8,
"x": 0,
- "y": 10
+ "y": 7
},
"hiddenSeries": false,
- "id": 10,
+ "id": 15,
+ "legend": {
+ "avg": true,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "max duration for {{slice}}",
+ "metric": "prometheus_local_storage_rushed_mode",
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Query elapsed time",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 7
+ },
+ "hiddenSeries": false,
+ "id": 17,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "created on {{ instance }}",
+ "metric": "prometheus_local_storage_maintain_series_duration_seconds_count",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "removed on {{ instance }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Head series created/deleted",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 7
+ },
+ "hiddenSeries": false,
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "exceeded_sample_limit on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "duplicate_timestamp on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "B",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "out_of_bounds on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "C",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "out_of_order on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "D",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "rule_evaluation_failure on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "G",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "tsdb_compactions_failed on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "K",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "tsdb_reloads_failures on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "L",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "head_series_not_found on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "N",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "evaluator_iterations_missed on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "O",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "evaluator_iterations_skipped on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "P",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Prometheus errors",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 14
+ },
+ "id": 57,
+ "panels": [],
+ "repeat": null,
+ "title": "Scrape & rule duration",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 15
+ },
+ "hiddenSeries": false,
+ "id": 25,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": false,
+ "show": false,
+ "sort": "max",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval",
+ "format": "time_series",
+ "interval": "2m",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "metric": "",
+ "refId": "A",
+ "step": 300
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Scrape delay (counts with 1m scrape interval)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 15
+ },
+ "hiddenSeries": false,
+ "id": 14,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Queue length",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Queue length",
+ "metric": "prometheus_local_storage_indexing_queue_length",
+ "refId": "B",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Rule evaulation duration",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 22
+ },
+ "id": 58,
+ "panels": [],
+ "repeat": null,
+ "title": "Requests & queries",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 18,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ handler }} on {{ instance }}",
+ "metric": "",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Request count",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 16,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{ handler }} on {{ instance }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Request duration per handler",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 19,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{ handler }} in {{ instance }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Request size by handler",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Allocated bytes": "#F9BA8F",
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max count collector": "#bf1b00",
+ "Max count harvester": "#bf1b00",
+ "Max to persist": "#3F6833",
+ "RSS": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 8,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/Max.*/",
+ "fill": 0,
+ "linewidth": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Current count ",
+ "metric": "last",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Max count",
+ "metric": "last",
+ "refId": "B",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Cont of concurent queries",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 30
+ },
+ "id": 59,
+ "panels": [],
+ "repeat": null,
+ "title": "Alerting",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Alert queue capacity on o collector": "#bf1b00",
+ "Alert queue capacity on o harvester": "#bf1b00",
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 31
+ },
+ "hiddenSeries": false,
+ "id": 20,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*capacity.*/",
+ "fill": 0,
+ "linewidth": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Alert queue capacity ",
+ "metric": "prometheus_local_storage_checkpoint_last_size_bytes",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Alert queue size on ",
+ "metric": "prometheus_local_storage_checkpoint_last_size_bytes",
+ "refId": "B",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Alert queue size",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 31
+ },
+ "hiddenSeries": false,
+ "id": 21,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Checkpoint chunks written/s",
+ "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Count of discovered alertmanagers",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 31
+ },
+ "hiddenSeries": false,
+ "id": 39,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "notifications_dropped on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "F",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "rule_evaluation_failures on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Alerting errors",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 38
+ },
+ "id": 60,
+ "panels": [],
+ "repeat": null,
+ "title": "Service discovery",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 43,
"legend": {
"avg": false,
"current": false,
@@ -529,10 +2052,8 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
- "options": {
- "dataLinks": []
- },
"percentage": false,
+ "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -540,35 +2061,468 @@
"spaceLength": 10,
"stack": false,
"steppedLine": false,
- "targets": [{
- "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)",
+ "targets": [
+ {
+ "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Consul target sync count",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Consul SD sync count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 44,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Marathon target sync count",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Marathon SD sync count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 45,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Count of target synces",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Kubernetes SD sync count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 46,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "exceeded_sample_limit on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "sd_file_read_error on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "E",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "sd_consul_rpc_failure on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "H",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "sd_marathon_refresh_failure on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "I",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "sd_openstack_refresh_failure on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "J",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Service discovery errors",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 46
+ },
+ "id": 61,
+ "panels": [],
+ "repeat": null,
+ "title": "TSDB stats",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 47
+ },
+ "hiddenSeries": false,
+ "id": 36,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ instance }}",
"refId": "A"
- },
- {
- "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes",
- "format": "time_series",
- "hide": true,
- "intervalFactor": 1,
- "refId": "B"
}
],
- "thresholds": [{
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 0.85
- }],
+ "thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "Memory Usage",
+ "title": "Reloaded block from disk",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -579,8 +2533,9 @@
"show": true,
"values": []
},
- "yaxes": [{
- "format": "decbytes",
+ "yaxes": [
+ {
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -602,42 +2557,18 @@
}
},
{
- "alert": {
- "conditions": [{
- "evaluator": {
- "params": [
- 90
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "A",
- "15m",
- "now"
- ]
- },
- "reducer": {
- "params": [],
- "type": "max"
- },
- "type": "query"
- }],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "CPU Usage alert",
- "noDataState": "no_data",
- "notifications": []
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
},
- "aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
"fieldConfig": {
"defaults": {
"custom": {}
@@ -647,19 +2578,19 @@
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 10
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 47
},
"hiddenSeries": false,
- "id": 11,
+ "id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "show": false,
"total": false,
"values": false
},
@@ -667,10 +2598,8 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
- "options": {
- "dataLinks": []
- },
"percentage": false,
+ "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -678,24 +2607,446 @@
"spaceLength": 10,
"stack": false,
"steppedLine": false,
- "targets": [{
- "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)",
- "format": "time_series",
- "intervalFactor": 3,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }],
- "thresholds": [{
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 90
- }],
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Loaded data blocks",
+ "metric": "prometheus_local_storage_memory_chunkdescs",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "CPU Usage",
+ "title": "Loaded data blocks",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 47
+ },
+ "hiddenSeries": false,
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Time series count",
+ "metric": "prometheus_local_storage_memory_series",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time series total count",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 47
+ },
+ "hiddenSeries": false,
+ "id": 1,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "samples/s {{instance}}",
+ "metric": "prometheus_local_storage_ingested_samples_total",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Samples Appended per second",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 54
+ },
+ "id": 62,
+ "panels": [],
+ "repeat": null,
+ "title": "Head block stats",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833",
+ "To persist": "#9AC48A"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 55
+ },
+ "hiddenSeries": false,
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/Max.*/",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Head chunk count",
+ "metric": "prometheus_local_storage_memory_chunks",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Head chunks count",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 55
+ },
+ "hiddenSeries": false,
+ "id": 35,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Length of head block",
"tooltip": {
"shared": true,
"sort": 0,
@@ -709,9 +3060,9 @@
"show": true,
"values": []
},
- "yaxes": [{
- "decimals": null,
- "format": "percent",
+ "yaxes": [
+ {
+ "format": "ms",
"label": null,
"logBase": 1,
"max": null,
@@ -733,124 +3084,537 @@
}
},
{
- "columns": [],
- "datasource": "prometheus",
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
- "fontSize": "100%",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 19
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 55
},
- "id": 31,
+ "hiddenSeries": false,
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [],
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 15,
- "desc": true
- },
- "styles": [{
- "$$hashKey": "object:5708",
- "alias": "Time",
- "align": "auto",
- "dateFormat": "HH:mm",
- "link": false,
- "pattern": "Time",
- "type": "date"
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "created on {{ instance }}",
+ "refId": "B"
},
{
- "$$hashKey": "object:5709",
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "/__name__|endpoint|fstype|alertstate|long|mountpoint|namespace|Value|short|pod|service|job/",
- "thresholds": [],
- "type": "hidden",
- "unit": "short"
- },
- {
- "$$hashKey": "object:5710",
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [],
- "type": "number",
- "unit": "short"
+ "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "deleted on {{ instance }}",
+ "refId": "C"
}
],
- "targets": [{
- "expr": "ALERTS{alertstate=\"firing\", alertname!=\"Watchdog\"}",
- "format": "table",
- "instant": true,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A"
- }],
- "title": "Active Alerts",
- "transform": "table",
- "type": "table-old"
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Head Chunks Created/Deleted per second",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
},
{
- "dashboardFilter": "",
- "dashboardTags": [],
+ "collapsed": false,
"datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 62
+ },
+ "id": 63,
+ "panels": [],
+ "repeat": null,
+ "title": "Data maintenance",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
- "folderId": null,
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 5,
- "x": 12,
- "y": 19
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 63
},
- "id": 27,
- "limit": 10,
+ "hiddenSeries": false,
+ "id": 33,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [],
- "nameFilter": "",
- "onlyAlertsOnDashboard": false,
- "show": "current",
- "sortOrder": 1,
- "stateFilter": [],
- "title": "Alarms",
- "type": "alertlist"
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Compaction duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 63
+ },
+ "hiddenSeries": false,
+ "id": 34,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ quantile }} on {{ instance }}",
+ "refId": "A"
+ }
],
- "datasource": "prometheus",
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Go Garbage collection duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 63
+ },
+ "hiddenSeries": false,
+ "id": 37,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ quantile }} on {{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "WAL truncate duration seconds",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 63
+ },
+ "hiddenSeries": false,
+ "id": 38,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ quantile }} {{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "WAL fsync duration seconds",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 70
+ },
+ "id": 64,
+ "panels": [],
+ "repeat": null,
+ "title": "RAM&CPU",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Allocated bytes": "#7EB26D",
+ "Allocated bytes - 1m max": "#BF1B00",
+ "Allocated bytes - 1m min": "#BF1B00",
+ "Allocated bytes - 5m max": "#BF1B00",
+ "Allocated bytes - 5m min": "#BF1B00",
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833",
+ "RSS": "#447EBC"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
"decimals": null,
"editable": true,
"error": false,
@@ -860,128 +3624,16 @@
},
"overrides": []
},
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 7,
- "x": 17,
- "y": 19
- },
- "id": 7,
- "interval": null,
- "isNew": true,
- "links": [],
- "mappingType": 1,
- "mappingTypes": [{
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [{
- "from": "null",
- "text": "N/A",
- "to": "null"
- }],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [{
- "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "metric": "",
- "refId": "A",
- "step": 10
- }],
- "thresholds": "65, 90",
- "title": "Cluster Filesystem usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [{
- "op": "=",
- "text": "N/A",
- "value": "null"
- }],
- "valueName": "current"
- },
- {
- "alert": {
- "alertRuleTags": {},
- "conditions": [{
- "evaluator": {
- "params": [
- 1
- ],
- "type": "lt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "C",
- "5m",
- "now"
- ]
- },
- "reducer": {
- "params": [],
- "type": "avg"
- },
- "type": "query"
- }],
- "executionErrorState": "alerting",
- "for": "2m",
- "frequency": "60s",
- "handler": 1,
- "name": "Node Down",
- "noDataState": "alerting",
- "notifications": []
- },
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 4,
- "w": 7,
- "x": 17,
- "y": 24
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 71
},
"hiddenSeries": false,
- "id": 29,
+ "id": 6,
"legend": {
"avg": false,
"current": false,
@@ -994,41 +3646,64 @@
"lines": true,
"linewidth": 1,
"links": [],
- "nullPointMode": "null as zero",
- "options": {
- "dataLinks": []
- },
+ "nullPointMode": "null",
"percentage": false,
+ "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
- "seriesOverrides": [],
+ "seriesOverrides": [
+ {
+ "alias": "/-/",
+ "fill": 0
+ },
+ {
+ "alias": "collector heap size",
+ "color": "#E0752D",
+ "fill": 0,
+ "linewidth": 2
+ },
+ {
+ "alias": "collector kubernetes memory limit",
+ "color": "#BF1B00",
+ "fill": 0,
+ "linewidth": 3
+ }
+ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
- "targets": [{
- "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)",
- "format": "time_series",
- "hide": false,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "C"
- }],
- "thresholds": [{
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "lt",
- "value": 1
- }],
+ "targets": [
+ {
+ "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Total resident memory - {{instance}}",
+ "metric": "process_resident_memory_bytes",
+ "refId": "B",
+ "step": 1800
+ },
+ {
+ "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Total llocated bytes - {{instance}}",
+ "metric": "go_memstats_alloc_bytes",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "Up Nodes",
+ "title": "Memory",
"tooltip": {
+ "msResolution": false,
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -1039,17 +3714,16 @@
"show": true,
"values": []
},
- "yaxes": [{
- "$$hashKey": "object:14563",
- "format": "short",
+ "yaxes": [
+ {
+ "format": "bytes",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": "0",
"show": true
},
{
- "$$hashKey": "object:14564",
"format": "short",
"label": null,
"logBase": 1,
@@ -1064,42 +3738,20 @@
}
},
{
- "alert": {
- "conditions": [{
- "evaluator": {
- "params": [
- 85
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "A",
- "1m",
- "now"
- ]
- },
- "reducer": {
- "params": [],
- "type": "avg"
- },
- "type": "query"
- }],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "CPU Temperature alert",
- "noDataState": "no_data",
- "notifications": []
+ "aliasColors": {
+ "Allocated bytes": "#F9BA8F",
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833",
+ "RSS": "#890F02"
},
- "aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
"fieldConfig": {
"defaults": {
"custom": {}
@@ -1109,19 +3761,19 @@
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 6,
- "w": 24,
- "x": 0,
- "y": 28
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 71
},
"hiddenSeries": false,
- "id": 13,
+ "id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "show": false,
"total": false,
"values": false
},
@@ -1129,10 +3781,8 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
- "options": {
- "dataLinks": []
- },
"percentage": false,
+ "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -1140,27 +3790,26 @@
"spaceLength": 10,
"stack": false,
"steppedLine": false,
- "targets": [{
- "expr": "rpi_cpu_temperature_celsius",
- "format": "time_series",
- "intervalFactor": 5,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }],
- "thresholds": [{
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 85
- }],
+ "targets": [
+ {
+ "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Allocated Bytes/s",
+ "metric": "go_memstats_alloc_bytes",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "CPU Temperature",
+ "title": "Allocations per second",
"tooltip": {
+ "msResolution": false,
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -1171,12 +3820,13 @@
"show": true,
"values": []
},
- "yaxes": [{
- "format": "celsius",
+ "yaxes": [
+ {
+ "format": "bytes",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": "0",
"show": true
},
{
@@ -1194,609 +3844,396 @@
}
},
{
- "collapsed": false,
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 71
+ },
+ "hiddenSeries": false,
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "CPU/s",
+ "metric": "prometheus_local_storage_ingested_samples_total",
+ "refId": "B",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU per second",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+ "avg"
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
- "y": 34
+ "y": 78
},
- "id": 17,
- "panels": [],
- "title": "Pods",
+ "id": 68,
+ "panels": [
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 79
+ },
+ "hiddenSeries": false,
+ "id": 47,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "M",
+ "step": 1800
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Net errors",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Contrac errors",
"type": "row"
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 0,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {},
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 35
- },
- "hiddenSeries": false,
- "id": 3,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 270,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [],
- "nullPointMode": "null as zero",
- "options": {
- "dataLinks": []
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [{
- "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))",
- "format": "time_series",
- "instant": false,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "{{ pod}}",
- "metric": "container_cpu",
- "refId": "A",
- "step": 10
- }],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Pod CPU usage",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [{
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {},
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 42
- },
- "hiddenSeries": false,
- "id": 2,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 250,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [],
- "nullPointMode": "connected",
- "options": {
- "dataLinks": []
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [{
- "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))",
- "format": "time_series",
- "hide": true,
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "{{ pod }}",
- "metric": "container_memory_usage:sort_desc",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "{{ pod }}",
- "refId": "B"
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Pod memory usage",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [{
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "grid": {},
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 49
- },
- "hiddenSeries": false,
- "id": 19,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": 550,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "dataLinks": []
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [{
- "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ pod_name }}",
- "refId": "A",
- "step": 240
- },
- {
- "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
- "format": "time_series",
- "hide": true,
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Sent Network Traffic per Container",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [{
- "format": "Bps",
- "label": "",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": "",
- "logBase": 10,
- "max": 8,
- "min": 0,
- "show": false
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "grid": {},
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 49
- },
- "hiddenSeries": false,
- "id": 21,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": 150,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "dataLinks": []
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [{
- "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{pod_name}}",
- "refId": "A",
- "step": 240
- },
- {
- "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])",
- "format": "time_series",
- "hide": true,
- "intervalFactor": 2,
- "legendFormat": "{{pod_name}}",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Received Network Traffic per Container",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [{
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {},
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 56
- },
- "hiddenSeries": false,
- "id": 8,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 220,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [],
- "nullPointMode": "connected",
- "options": {
- "dataLinks": []
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [{
- "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "Receive Traffic",
- "metric": "network",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "Transmit Traffic",
- "metric": "network",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Pod Network i/o",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [{
- "$$hashKey": "object:1163",
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "$$hashKey": "object:1164",
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
}
],
- "refresh": "10s",
- "schemaVersion": 25,
+ "refresh": "5m",
+ "schemaVersion": 26,
"style": "dark",
"tags": [
- "custom",
- "overview"
+ "custom"
],
"templating": {
- "list": []
+ "list": [
+ {
+ "auto": true,
+ "auto_count": 30,
+ "auto_min": "2m",
+ "current": {
+ "selected": false,
+ "text": "auto",
+ "value": "$__auto_interval_aggregation_interval"
+ },
+ "hide": 0,
+ "label": "aggregation intarval",
+ "name": "aggregation_interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "auto",
+ "value": "$__auto_interval_aggregation_interval"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": false,
+ "text": "localhost:9090",
+ "value": "localhost:9090"
+ },
+ "datasource": "$datasource",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Instance",
+ "multi": false,
+ "name": "instance",
+ "options": [],
+ "query": "label_values(prometheus_build_info, instance)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 2,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "current": {
+ "text": "60",
+ "value": "60"
+ },
+ "hide": 0,
+ "label": "Scrape interval seconds",
+ "name": "scrape_interval",
+ "options": [
+ {
+ "text": "60",
+ "value": "60"
+ }
+ ],
+ "query": "60",
+ "skipUrlSync": false,
+ "type": "constant"
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "Prometheus",
+ "value": "Prometheus"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "Prometheus datasource",
+ "multi": false,
+ "name": "datasource",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "No data sources found",
+ "value": ""
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "InfluxDB datasource",
+ "multi": false,
+ "name": "influx_datasource",
+ "options": [],
+ "query": "influxdb",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ }
+ ]
},
"time": {
"from": "now-3h",
@@ -1804,6 +4241,7 @@
},
"timepicker": {
"refresh_intervals": [
+ "5s",
"10s",
"30s",
"1m",
@@ -1827,6 +4265,7 @@
]
},
"timezone": "browser",
- "title": "⭐️ Kubernetes Monitoring Overview",
- "version": 1
-}
+ "title": "Prometheus Monitoring",
+ "uid": "XmsJC9mRz",
+ "version": 2
+}
\ No newline at end of file
diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml
index 714b351..e7a908e 100644
--- a/manifests/grafana-dashboardDefinitions.yaml
+++ b/manifests/grafana-dashboardDefinitions.yaml
@@ -2015,7 +2015,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / API server",
- "uid": "",
+ "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b",
"version": 0
}
kind: ConfigMap
@@ -3840,7 +3840,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Networking / Cluster",
- "uid": "",
+ "uid": "ff635a025bcfea7bc3dd4f508990a3e9",
"version": 0
}
kind: ConfigMap
@@ -4966,7 +4966,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Controller Manager",
- "uid": "",
+ "uid": "72e0e05bef5099e5f049b05fdc429ed4",
"version": 0
}
kind: ConfigMap
@@ -6445,7 +6445,6 @@ items:
},
"timezone": "utc",
"title": "CoreDNS",
- "uid": "",
"version": 1
}
kind: ConfigMap
@@ -8988,7 +8987,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Cluster",
- "uid": "",
+ "uid": "efa86fd1d0c121a26444b636a3f509a8",
"version": 0
}
kind: ConfigMap
@@ -11236,7 +11235,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Namespace (Pods)",
- "uid": "",
+ "uid": "85a562078cdf77779eaa1add43ccec1e",
"version": 0
}
kind: ConfigMap
@@ -12183,7 +12182,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Node (Pods)",
- "uid": "",
+ "uid": "200ac8fdbfbb74b39aff88118e4d1c2c",
"version": 0
}
kind: ConfigMap
@@ -13924,7 +13923,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Pod",
- "uid": "",
+ "uid": "6581e46e4e5c7ba40a07646395ef7b23",
"version": 0
}
kind: ConfigMap
@@ -15923,7 +15922,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Workload",
- "uid": "",
+ "uid": "a164a7f0339f99e89cea5cb47e9be617",
"version": 0
}
kind: ConfigMap
@@ -18078,7 +18077,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Compute Resources / Namespace (Workloads)",
- "uid": "",
+ "uid": "a87fb0d919ec0ea5f6543124e16c42a5",
"version": 0
}
kind: ConfigMap
@@ -20577,7 +20576,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Kubelet",
- "uid": "",
+ "uid": "3138fa155d5915769fbded898ac09fd9",
"version": 0
}
kind: ConfigMap
@@ -22666,7 +22665,6 @@ items:
},
"timezone": "browser",
"title": "Kubernetes cluster monitoring (via Prometheus)",
- "uid": "",
"version": 1
}
kind: ConfigMap
@@ -24079,7 +24077,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Networking / Namespace (Pods)",
- "uid": "",
+ "uid": "8b7a8b326d7a6f1f04244066368c67af",
"version": 0
}
kind: ConfigMap
@@ -25760,7 +25758,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Networking / Namespace (Workload)",
- "uid": "",
+ "uid": "bbb2a765a623ae38130206c7d94a160f",
"version": 0
}
kind: ConfigMap
@@ -26708,7 +26706,7 @@ items:
},
"timezone": "UTC",
"title": "USE Method / Cluster",
- "uid": "",
+ "uid": "3e97d1d02672cdd0861f4c97c64f89b2",
"version": 0
}
kind: ConfigMap
@@ -27683,7 +27681,7 @@ items:
},
"timezone": "UTC",
"title": "USE Method / Node",
- "uid": "",
+ "uid": "fac67cfbe174d3ef53eb473d73d9212f",
"version": 0
}
kind: ConfigMap
@@ -28661,7 +28659,7 @@ items:
},
"timezone": "UTC",
"title": "Nodes",
- "uid": "",
+ "uid": "fa49a4706d07a042595b664c87fb33ea",
"version": 0
}
kind: ConfigMap
@@ -29220,7 +29218,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Persistent Volumes",
- "uid": "",
+ "uid": "919b92a8e8041bd567af9edab12c840c",
"version": 0
}
kind: ConfigMap
@@ -30400,7 +30398,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Networking / Pod",
- "uid": "",
+ "uid": "7a18067ce943a40ae25454675c19ff5c",
"version": 0
}
kind: ConfigMap
@@ -30421,29 +30419,100 @@ items:
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
+ },
+ {
+ "datasource": "$datasource",
+ "enable": true,
+ "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)",
+ "hide": false,
+ "iconColor": "rgb(250, 44, 18)",
+ "limit": 100,
+ "name": "downage",
+ "showIn": 0,
+ "step": "30s",
+ "tagKeys": "instance",
+ "textFormat": "prometheus down",
+ "titleFormat": "Downage",
+ "type": "alert"
+ },
+ {
+ "datasource": "$datasource",
+ "enable": true,
+ "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)",
+ "hide": false,
+ "iconColor": "#fceaca",
+ "limit": 100,
+ "name": "Reload",
+ "showIn": 0,
+ "step": "5m",
+ "tagKeys": "instance",
+ "tags": [
+
+ ],
+ "titleFormat": "Reload",
+ "type": "tags"
}
]
},
- "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ",
+ "description": "Dashboard for monitoring of Prometheus v2.x.x",
"editable": true,
- "gnetId": 162,
+ "gnetId": 3681,
"graphTooltip": 1,
+ "id": 4,
+ "iteration": 1596721016726,
"links": [
+ {
+ "icon": "info",
+ "tags": [
+ ],
+ "targetBlank": true,
+ "title": "Dashboard's Github ",
+ "tooltip": "Github repo of this dashboard",
+ "type": "link",
+ "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard"
+ },
+ {
+ "icon": "doc",
+ "tags": [
+
+ ],
+ "targetBlank": true,
+ "title": "Prometheus Docs",
+ "tooltip": "",
+ "type": "link",
+ "url": "http://prometheus.io/docs/introduction/overview/"
+ }
],
"panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 55,
+ "panels": [
+
+ ],
+ "repeat": null,
+ "title": "Header instance info",
+ "type": "row"
+ },
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
- "rgba(50, 172, 45, 0.97)",
+ "#299c46",
"rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
+ "#bf1b00"
],
- "datasource": "prometheus",
- "editable": true,
- "error": false,
+ "datasource": "$datasource",
+ "decimals": 1,
"fieldConfig": {
"defaults": {
"custom": {
@@ -30454,23 +30523,22 @@ items:
]
},
- "format": "percent",
+ "format": "s",
"gauge": {
- "maxValue": 100,
+ "maxValue": 1000000,
"minValue": 0,
- "show": true,
+ "show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
- "h": 7,
- "w": 8,
+ "h": 5,
+ "w": 4,
"x": 0,
- "y": 0
+ "y": 1
},
- "id": 4,
+ "id": 41,
"interval": null,
- "isNew": true,
"links": [
],
@@ -30508,16 +30576,15 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100",
+ "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})",
"format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "refId": "A",
- "step": 10
+ "instant": false,
+ "intervalFactor": 2,
+ "refId": "A"
}
],
- "thresholds": "65, 90",
- "title": "Cluster memory usage",
+ "thresholds": "",
+ "title": "Uptime",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
@@ -30531,14 +30598,14 @@ items:
},
{
"cacheTimeout": null,
- "colorBackground": true,
- "colorValue": false,
+ "colorBackground": false,
+ "colorValue": true,
"colors": [
- "rgba(0, 0, 0, 0)",
- "rgb(210, 1, 1)",
- "#890f02"
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#bf1b00"
],
- "datasource": "prometheus",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
@@ -30549,7 +30616,100 @@ items:
]
},
- "format": "percentunit",
+ "format": "short",
+ "gauge": {
+ "maxValue": 1000000,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 8,
+ "x": 4,
+ "y": 1
+ },
+ "id": 42,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "prometheus_tsdb_head_series{instance=\"localhost:9090\", job=\"prometheus\"}",
+ "targets": [
+ {
+ "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "500000,800000,1000000",
+ "title": "Total count of time series",
+ "type": "singlestat",
+ "valueFontSize": "150%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
@@ -30558,12 +30718,12 @@ items:
"thresholdMarkers": true
},
"gridPos": {
- "h": 2,
- "w": 8,
- "x": 8,
- "y": 0
+ "h": 5,
+ "w": 3,
+ "x": 12,
+ "y": 1
},
- "id": 23,
+ "id": 48,
"interval": null,
"links": [
@@ -30599,22 +30759,242 @@ items:
"lineColor": "rgb(31, 120, 193)",
"show": false
},
- "tableColumn": "{job=\"kubelet\"}",
+ "tableColumn": "version",
"targets": [
{
- "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)",
- "format": "time_series",
+ "expr": "prometheus_build_info{instance=\"$instance\"}",
+ "format": "table",
"instant": true,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "",
+ "intervalFactor": 2,
"refId": "A"
}
],
- "thresholds": "1.1",
- "title": "Up Nodes",
+ "thresholds": "",
+ "title": "Version",
"type": "singlestat",
- "valueFontSize": "120%",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "first"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "decimals": 2,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "format": "ms",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 15,
+ "y": 1
+ },
+ "id": 49,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "{instance=\"localhost:9090\", job=\"prometheus\"}",
+ "targets": [
+ {
+ "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Actual head block length",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "content": "",
+ "datasource": null,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 2,
+ "x": 19,
+ "y": 1
+ },
+ "height": "",
+ "id": 50,
+ "links": [
+
+ ],
+ "mode": "html",
+ "options": {
+ "content": "",
+ "mode": "html"
+ },
+ "pluginVersion": "7.1.0",
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#e6522c",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "$datasource",
+ "decimals": 1,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 3,
+ "x": 21,
+ "y": 1
+ },
+ "id": 52,
+ "interval": null,
+ "links": [
+
+ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "2",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "10,20",
+ "title": "",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "200%",
"valueMaps": [
{
"op": "=",
@@ -30625,16 +31005,30 @@ items:
"valueName": "avg"
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "id": 56,
+ "panels": [
+
],
- "datasource": "prometheus",
- "decimals": 0,
+ "repeat": null,
+ "title": "Main info",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
"editable": true,
"error": false,
"fieldConfig": {
@@ -30647,287 +31041,434 @@ items:
]
},
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 7
},
+ "hiddenSeries": false,
+ "id": 15,
+ "legend": {
+ "avg": true,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "max duration for {{slice}}",
+ "metric": "prometheus_local_storage_rushed_mode",
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Query elapsed time",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 7
+ },
+ "hiddenSeries": false,
+ "id": 17,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "created on {{ instance }}",
+ "metric": "prometheus_local_storage_maintain_series_duration_seconds_count",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "removed on {{ instance }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Head series created/deleted",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
- "y": 0
- },
- "id": 6,
- "interval": null,
- "isNew": true,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": "65, 90",
- "title": "Cluster CPU usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "columns": [
-
- ],
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fontSize": "90%",
- "gridPos": {
- "h": 5,
- "w": 8,
- "x": 8,
- "y": 2
- },
- "id": 25,
- "links": [
-
- ],
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 2,
- "desc": false
- },
- "styles": [
- {
- "alias": "Time",
- "align": "auto",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "link": false,
- "pattern": "Time",
- "type": "date"
- },
- {
- "alias": "Uptime",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Value",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "s"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/endpoint|job|namespace|pod|service/",
- "thresholds": [
-
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "instance",
- "preserveFormat": false,
- "sanitize": false,
- "thresholds": [
-
- ],
- "type": "string",
- "unit": "short"
- }
- ],
- "targets": [
- {
- "expr": "(time() - node_boot_time_seconds)",
- "format": "table",
- "instant": true,
- "intervalFactor": 1,
- "refId": "A"
- }
- ],
- "title": "Node Uptime",
- "transform": "table",
- "transparent": true,
- "type": "table-old"
- },
- {
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- },
- "mappings": [
- {
- "from": "",
- "id": 0,
- "operator": "",
- "text": "Up",
- "to": "",
- "type": 1,
- "value": "1"
- },
- {
- "from": "",
- "id": 1,
- "operator": "",
- "text": "Down",
- "to": "",
- "type": 1,
- "value": "0"
- },
- {
- "from": "",
- "id": 2,
- "operator": "",
- "text": "Down",
- "to": "",
- "type": 1,
- "value": "0.5"
- }
- ],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "rgb(0, 0, 0)",
- "value": null
- },
- {
- "color": "red",
- "value": 0
- },
- {
- "color": "green",
- "value": 1
- }
- ]
- }
- },
- "overrides": [
-
- ]
- },
- "gridPos": {
- "h": 2,
- "w": 24,
- "x": 0,
"y": 7
},
- "id": 35,
- "options": {
- "colorMode": "background",
- "graphMode": "area",
- "justifyMode": "auto",
- "orientation": "vertical",
- "reduceOptions": {
- "calcs": [
- "lastNotNull"
- ],
- "fields": "",
- "values": false
- }
+ "hiddenSeries": false,
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
},
- "pluginVersion": "7.0.3",
- "repeat": null,
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "sort(avg by (job) (up{job=~\"kube-dns|kubelet|traefik.*|apiserver|kube-controller-manager|kube-scheduler\"}))",
+ "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
"format": "time_series",
- "instant": true,
"interval": "",
- "legendFormat": "{{job}}",
- "refId": "A"
+ "intervalFactor": 2,
+ "legendFormat": "exceeded_sample_limit on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "duplicate_timestamp on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "B",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "out_of_bounds on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "C",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "out_of_order on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "D",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "rule_evaluation_failure on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "G",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "tsdb_compactions_failed on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "K",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "tsdb_reloads_failures on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "L",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "head_series_not_found on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "N",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "evaluator_iterations_missed on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "O",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "evaluator_iterations_skipped on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "P",
+ "step": 1800
}
+ ],
+ "thresholds": [
+
],
"timeFrom": null,
+ "timeRegions": [
+
+ ],
"timeShift": null,
- "title": "Kubernetes Core Services",
- "type": "stat"
+ "title": "Prometheus errors",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
},
{
"collapsed": false,
@@ -30936,61 +31477,150 @@ items:
"h": 1,
"w": 24,
"x": 0,
- "y": 9
+ "y": 14
},
- "id": 15,
+ "id": 57,
"panels": [
],
- "title": "Nodes",
+ "repeat": null,
+ "title": "Scrape & rule duration",
"type": "row"
},
{
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 0.85
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "B",
- "5m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "max"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "for": "0m",
- "frequency": "60s",
- "handler": 1,
- "name": "Memory Usage alert",
- "noDataState": "no_data",
- "notifications": [
-
- ]
- },
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
+ "datasource": "$datasource",
+ "description": "",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "grid": {
+
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 15
+ },
+ "hiddenSeries": false,
+ "id": 25,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": false,
+ "show": false,
+ "sort": "max",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+
+ ],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval",
+ "format": "time_series",
+ "interval": "2m",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "metric": "",
+ "refId": "A",
+ "step": 300
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Scrape delay (counts with 1m scrape interval)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
"fieldConfig": {
"defaults": {
"custom": {
@@ -31004,19 +31634,19 @@ items:
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 9,
+ "h": 7,
"w": 12,
- "x": 0,
- "y": 10
+ "x": 12,
+ "y": 15
},
"hiddenSeries": false,
- "id": 10,
+ "id": 14,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "show": false,
"total": false,
"values": false
},
@@ -31026,12 +31656,146 @@ items:
],
"nullPointMode": "null",
- "options": {
- "dataLinks": [
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Queue length",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Queue length",
+ "metric": "prometheus_local_storage_indexing_queue_length",
+ "refId": "B",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Rule evaulation duration",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
]
},
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 22
+ },
+ "id": 58,
+ "panels": [
+
+ ],
+ "repeat": null,
+ "title": "Requests & queries",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 18,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
"percentage": false,
+ "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -31043,38 +31807,147 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)",
+ "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{ instance }}",
- "refId": "A"
+ "legendFormat": "{{ handler }} on {{ instance }}",
+ "metric": "",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Request count",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
{
- "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 16,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0",
"format": "time_series",
- "hide": true,
- "intervalFactor": 1,
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{ handler }} on {{ instance }}",
"refId": "B"
}
],
"thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 0.85
- }
+
],
"timeFrom": null,
"timeRegions": [
],
"timeShift": null,
- "title": "Memory Usage",
+ "title": "Request duration per handler",
"tooltip": {
+ "msResolution": false,
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -31089,11 +31962,11 @@ items:
},
"yaxes": [
{
- "format": "decbytes",
+ "format": "µs",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": "0",
"show": true
},
{
@@ -31111,50 +31984,18 @@ items:
}
},
{
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 90
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "A",
- "15m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "max"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "CPU Usage alert",
- "noDataState": "no_data",
- "notifications": [
-
- ]
- },
"aliasColors": {
-
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
"fieldConfig": {
"defaults": {
"custom": {
@@ -31168,13 +32009,674 @@ items:
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
+ "h": 7,
+ "w": 6,
"x": 12,
- "y": 10
+ "y": 23
},
"hiddenSeries": false,
- "id": 11,
+ "id": 19,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{ handler }} in {{ instance }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Request size by handler",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Allocated bytes": "#F9BA8F",
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max count collector": "#bf1b00",
+ "Max count harvester": "#bf1b00",
+ "Max to persist": "#3F6833",
+ "RSS": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 8,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/Max.*/",
+ "fill": 0,
+ "linewidth": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Current count ",
+ "metric": "last",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Max count",
+ "metric": "last",
+ "refId": "B",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Cont of concurent queries",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 30
+ },
+ "id": 59,
+ "panels": [
+
+ ],
+ "repeat": null,
+ "title": "Alerting",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Alert queue capacity on o collector": "#bf1b00",
+ "Alert queue capacity on o harvester": "#bf1b00",
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 31
+ },
+ "hiddenSeries": false,
+ "id": 20,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*capacity.*/",
+ "fill": 0,
+ "linewidth": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Alert queue capacity ",
+ "metric": "prometheus_local_storage_checkpoint_last_size_bytes",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Alert queue size on ",
+ "metric": "prometheus_local_storage_checkpoint_last_size_bytes",
+ "refId": "B",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Alert queue size",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 31
+ },
+ "hiddenSeries": false,
+ "id": 21,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Checkpoint chunks written/s",
+ "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Count of discovered alertmanagers",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 31
+ },
+ "hiddenSeries": false,
+ "id": 39,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "notifications_dropped on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "F",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "rule_evaluation_failures on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Alerting errors",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 38
+ },
+ "id": 60,
+ "panels": [
+
+ ],
+ "repeat": null,
+ "title": "Service discovery",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 43,
"legend": {
"avg": false,
"current": false,
@@ -31190,12 +32692,8 @@ items:
],
"nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
"percentage": false,
+ "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -31207,28 +32705,23 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)",
+ "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])",
"format": "time_series",
- "intervalFactor": 3,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "intervalFactor": 2,
+ "legendFormat": "Consul target sync count",
+ "refId": "A",
+ "step": 240
}
],
"thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 90
- }
+
],
"timeFrom": null,
"timeRegions": [
],
"timeShift": null,
- "title": "CPU Usage",
+ "title": "Consul SD sync count",
"tooltip": {
"shared": true,
"sort": 0,
@@ -31246,8 +32739,7 @@ items:
},
"yaxes": [
{
- "decimals": null,
- "format": "percent",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -31269,10 +32761,13 @@ items:
}
},
{
- "columns": [
+ "aliasColors": {
- ],
- "datasource": "prometheus",
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
@@ -31283,93 +32778,509 @@ items:
]
},
- "fontSize": "100%",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 19
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 39
},
- "id": 31,
+ "hiddenSeries": false,
+ "id": 44,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 15,
- "desc": true
- },
- "styles": [
- {
- "$$hashKey": "object:5708",
- "alias": "Time",
- "align": "auto",
- "dateFormat": "HH:mm",
- "link": false,
- "pattern": "Time",
- "type": "date"
- },
- {
- "$$hashKey": "object:5709",
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "/__name__|endpoint|fstype|alertstate|long|mountpoint|namespace|Value|short|pod|service|job/",
- "thresholds": [
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "$$hashKey": "object:5710",
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- }
],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "ALERTS{alertstate=\"firing\", alertname!=\"Watchdog\"}",
- "format": "table",
- "instant": true,
+ "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Marathon target sync count",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Marathon SD sync count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 45,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Count of target synces",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Kubernetes SD sync count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 46,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
"interval": "",
- "intervalFactor": 1,
- "legendFormat": "",
+ "intervalFactor": 2,
+ "legendFormat": "exceeded_sample_limit on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "A",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "sd_file_read_error on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "E",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "sd_consul_rpc_failure on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "H",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "sd_marathon_refresh_failure on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "I",
+ "step": 1800
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "sd_openstack_refresh_failure on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "J",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Service discovery errors",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 46
+ },
+ "id": 61,
+ "panels": [
+
+ ],
+ "repeat": null,
+ "title": "TSDB stats",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 47
+ },
+ "hiddenSeries": false,
+ "id": 36,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }}",
"refId": "A"
}
],
- "title": "Active Alerts",
- "transform": "table",
- "type": "table-old"
- },
- {
- "dashboardFilter": "",
- "dashboardTags": [
+ "thresholds": [
],
- "datasource": null,
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Reloaded block from disk",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
"fieldConfig": {
"defaults": {
"custom": {
@@ -31380,38 +33291,1198 @@ items:
]
},
- "folderId": null,
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 9,
- "w": 5,
- "x": 12,
- "y": 19
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 47
},
- "id": 27,
- "limit": 10,
+ "hiddenSeries": false,
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
],
- "nameFilter": "",
- "onlyAlertsOnDashboard": false,
- "show": "current",
- "sortOrder": 1,
- "stateFilter": [
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
],
- "title": "Alarms",
- "type": "alertlist"
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Loaded data blocks",
+ "metric": "prometheus_local_storage_memory_chunkdescs",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Loaded data blocks",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 47
+ },
+ "hiddenSeries": false,
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
],
- "datasource": "prometheus",
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Time series count",
+ "metric": "prometheus_local_storage_memory_series",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Time series total count",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 47
+ },
+ "hiddenSeries": false,
+ "id": 1,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "samples/s {{instance}}",
+ "metric": "prometheus_local_storage_ingested_samples_total",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Samples Appended per second",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 54
+ },
+ "id": 62,
+ "panels": [
+
+ ],
+ "repeat": null,
+ "title": "Head block stats",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833",
+ "To persist": "#9AC48A"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 55
+ },
+ "hiddenSeries": false,
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/Max.*/",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Head chunk count",
+ "metric": "prometheus_local_storage_memory_chunks",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Head chunks count",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 55
+ },
+ "hiddenSeries": false,
+ "id": 35,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Length of head block",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 55
+ },
+ "hiddenSeries": false,
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "created on {{ instance }}",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "deleted on {{ instance }}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Head Chunks Created/Deleted per second",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 62
+ },
+ "id": 63,
+ "panels": [
+
+ ],
+ "repeat": null,
+ "title": "Data maintenance",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 0,
+ "y": 63
+ },
+ "hiddenSeries": false,
+ "id": 33,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ instance }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Compaction duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 6,
+ "y": 63
+ },
+ "hiddenSeries": false,
+ "id": 34,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ quantile }} on {{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Go Garbage collection duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 12,
+ "y": 63
+ },
+ "hiddenSeries": false,
+ "id": 37,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ quantile }} on {{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "WAL truncate duration seconds",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 18,
+ "y": 63
+ },
+ "hiddenSeries": false,
+ "id": 38,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ quantile }} {{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "WAL fsync duration seconds",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 70
+ },
+ "id": 64,
+ "panels": [
+
+ ],
+ "repeat": null,
+ "title": "RAM&CPU",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Allocated bytes": "#7EB26D",
+ "Allocated bytes - 1m max": "#BF1B00",
+ "Allocated bytes - 1m min": "#BF1B00",
+ "Allocated bytes - 5m max": "#BF1B00",
+ "Allocated bytes - 5m min": "#BF1B00",
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833",
+ "RSS": "#447EBC"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
"decimals": null,
"editable": true,
"error": false,
@@ -31425,151 +34496,16 @@ items:
]
},
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 7,
- "x": 17,
- "y": 19
- },
- "id": 7,
- "interval": null,
- "isNew": true,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "metric": "",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": "65, 90",
- "title": "Cluster Filesystem usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "alert": {
- "alertRuleTags": {
-
- },
- "conditions": [
- {
- "evaluator": {
- "params": [
- 1
- ],
- "type": "lt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "C",
- "5m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "avg"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "for": "2m",
- "frequency": "60s",
- "handler": 1,
- "name": "Node Down",
- "noDataState": "alerting",
- "notifications": [
-
- ]
- },
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 4,
- "w": 7,
- "x": 17,
- "y": 24
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 71
},
"hiddenSeries": false,
- "id": 29,
+ "id": 6,
"legend": {
"avg": false,
"current": false,
@@ -31584,51 +34520,68 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
- "options": {
- "dataLinks": [
-
- ]
- },
+ "nullPointMode": "null",
"percentage": false,
+ "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
-
+ {
+ "alias": "/-/",
+ "fill": 0
+ },
+ {
+ "alias": "collector heap size",
+ "color": "#E0752D",
+ "fill": 0,
+ "linewidth": 2
+ },
+ {
+ "alias": "collector kubernetes memory limit",
+ "color": "#BF1B00",
+ "fill": 0,
+ "linewidth": 3
+ }
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)",
+ "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)",
"format": "time_series",
"hide": false,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "C"
+ "intervalFactor": 2,
+ "legendFormat": "Total resident memory - {{instance}}",
+ "metric": "process_resident_memory_bytes",
+ "refId": "B",
+ "step": 1800
+ },
+ {
+ "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Total llocated bytes - {{instance}}",
+ "metric": "go_memstats_alloc_bytes",
+ "refId": "A",
+ "step": 1800
}
],
"thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "lt",
- "value": 1
- }
+
],
"timeFrom": null,
"timeRegions": [
],
"timeShift": null,
- "title": "Up Nodes",
+ "title": "Memory",
"tooltip": {
+ "msResolution": false,
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -31643,16 +34596,14 @@ items:
},
"yaxes": [
{
- "$$hashKey": "object:14563",
- "format": "short",
+ "format": "bytes",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": "0",
"show": true
},
{
- "$$hashKey": "object:14564",
"format": "short",
"label": null,
"logBase": 1,
@@ -31667,50 +34618,20 @@ items:
}
},
{
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 85
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "A",
- "1m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "avg"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "CPU Temperature alert",
- "noDataState": "no_data",
- "notifications": [
-
- ]
- },
"aliasColors": {
-
+ "Allocated bytes": "#F9BA8F",
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833",
+ "RSS": "#890F02"
},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
"fieldConfig": {
"defaults": {
"custom": {
@@ -31724,19 +34645,19 @@ items:
"fill": 1,
"fillGradient": 0,
"gridPos": {
- "h": 6,
- "w": 24,
- "x": 0,
- "y": 28
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 71
},
"hiddenSeries": false,
- "id": 13,
+ "id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "show": false,
"total": false,
"values": false
},
@@ -31746,12 +34667,8 @@ items:
],
"nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
"percentage": false,
+ "pluginVersion": "7.1.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -31763,31 +34680,28 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rpi_cpu_temperature_celsius",
+ "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])",
"format": "time_series",
- "intervalFactor": 5,
- "legendFormat": "{{instance}}",
- "refId": "A"
+ "intervalFactor": 2,
+ "legendFormat": "Allocated Bytes/s",
+ "metric": "go_memstats_alloc_bytes",
+ "refId": "A",
+ "step": 1800
}
],
"thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 85
- }
+
],
"timeFrom": null,
"timeRegions": [
],
"timeShift": null,
- "title": "CPU Temperature",
+ "title": "Allocations per second",
"tooltip": {
+ "msResolution": false,
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -31802,11 +34716,11 @@ items:
},
"yaxes": [
{
- "format": "celsius",
+ "format": "bytes",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": "0",
"show": true
},
{
@@ -31824,723 +34738,431 @@ items:
}
},
{
- "collapsed": false,
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 71
+ },
+ "hiddenSeries": false,
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "CPU/s",
+ "metric": "prometheus_local_storage_ingested_samples_total",
+ "refId": "B",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "CPU per second",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+ "avg"
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
- "y": 34
+ "y": 78
},
- "id": 17,
+ "id": 68,
"panels": [
+ {
+ "aliasColors": {
+ "Chunks": "#1F78C1",
+ "Chunks to persist": "#508642",
+ "Max chunks": "#052B51",
+ "Max to persist": "#3F6833"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 79
+ },
+ "hiddenSeries": false,
+ "id": 47,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pluginVersion": "7.1.2",
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}",
+ "metric": "prometheus_local_storage_chunk_ops_total",
+ "refId": "M",
+ "step": 1800
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeRegions": [
+
+ ],
+ "timeShift": null,
+ "title": "Net errors",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
],
- "title": "Pods",
+ "repeat": null,
+ "title": "Contrac errors",
"type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 0,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 35
- },
- "hiddenSeries": false,
- "id": 3,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 270,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))",
- "format": "time_series",
- "instant": false,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "{{ pod}}",
- "metric": "container_cpu",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Pod CPU usage",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 42
- },
- "hiddenSeries": false,
- "id": 2,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 250,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))",
- "format": "time_series",
- "hide": true,
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "{{ pod }}",
- "metric": "container_memory_usage:sort_desc",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "{{ pod }}",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Pod memory usage",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 49
- },
- "hiddenSeries": false,
- "id": 19,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": 550,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ pod_name }}",
- "refId": "A",
- "step": 240
- },
- {
- "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
- "format": "time_series",
- "hide": true,
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Sent Network Traffic per Container",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": "",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": "",
- "logBase": 10,
- "max": 8,
- "min": 0,
- "show": false
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 49
- },
- "hiddenSeries": false,
- "id": 21,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": 150,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{pod_name}}",
- "refId": "A",
- "step": 240
- },
- {
- "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])",
- "format": "time_series",
- "hide": true,
- "intervalFactor": 2,
- "legendFormat": "{{pod_name}}",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Received Network Traffic per Container",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 56
- },
- "hiddenSeries": false,
- "id": 8,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 220,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "Receive Traffic",
- "metric": "network",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "Transmit Traffic",
- "metric": "network",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Pod Network i/o",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "$$hashKey": "object:1163",
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "$$hashKey": "object:1164",
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
}
],
- "refresh": "10s",
- "schemaVersion": 25,
+ "refresh": "5m",
+ "schemaVersion": 26,
"style": "dark",
"tags": [
- "custom",
- "overview"
+ "custom"
],
"templating": {
"list": [
+ {
+ "auto": true,
+ "auto_count": 30,
+ "auto_min": "2m",
+ "current": {
+ "selected": false,
+ "text": "auto",
+ "value": "$__auto_interval_aggregation_interval"
+ },
+ "hide": 0,
+ "label": "aggregation intarval",
+ "name": "aggregation_interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "auto",
+ "value": "$__auto_interval_aggregation_interval"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": false,
+ "text": "localhost:9090",
+ "value": "localhost:9090"
+ },
+ "datasource": "$datasource",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Instance",
+ "multi": false,
+ "name": "instance",
+ "options": [
+ ],
+ "query": "label_values(prometheus_build_info, instance)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 2,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "current": {
+ "text": "60",
+ "value": "60"
+ },
+ "hide": 0,
+ "label": "Scrape interval seconds",
+ "name": "scrape_interval",
+ "options": [
+ {
+ "text": "60",
+ "value": "60"
+ }
+ ],
+ "query": "60",
+ "skipUrlSync": false,
+ "type": "constant"
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "Prometheus",
+ "value": "Prometheus"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "Prometheus datasource",
+ "multi": false,
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "No data sources found",
+ "value": ""
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "InfluxDB datasource",
+ "multi": false,
+ "name": "influx_datasource",
+ "options": [
+
+ ],
+ "query": "influxdb",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ }
]
},
"time": {
@@ -32549,6 +35171,7 @@ items:
},
"timepicker": {
"refresh_intervals": [
+ "5s",
"10s",
"30s",
"1m",
@@ -32572,9 +35195,9 @@ items:
]
},
"timezone": "browser",
- "title": "⭐️ Kubernetes Monitoring Overview",
- "uid": "",
- "version": 1
+ "title": "Prometheus Monitoring",
+ "uid": "XmsJC9mRz",
+ "version": 2
}
kind: ConfigMap
metadata:
@@ -36623,7 +39246,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Proxy",
- "uid": "",
+ "uid": "632e265de029684c40b21cb76bca4f94",
"version": 0
}
kind: ConfigMap
@@ -37673,7 +40296,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Scheduler",
- "uid": "",
+ "uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
"version": 0
}
kind: ConfigMap
@@ -38584,7 +41207,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / StatefulSets",
- "uid": "",
+ "uid": "a31c1f46e6f727cb37c0d731a7245005",
"version": 0
}
kind: ConfigMap
@@ -39970,7 +42593,7 @@ items:
},
"timezone": "UTC",
"title": "Kubernetes / Networking / Workload",
- "uid": "",
+ "uid": "728bf77cc1166d2f3133bf25846876cc",
"version": 0
}
kind: ConfigMap