diff --git a/base_operator_stack.jsonnet b/base_operator_stack.jsonnet index 1c4e850..1a7d640 100644 --- a/base_operator_stack.jsonnet +++ b/base_operator_stack.jsonnet @@ -82,7 +82,7 @@ local vars = import 'vars.jsonnet'; pvc.new() + pvc.mixin.spec.withAccessModes('ReadWriteOnce') + pvc.mixin.spec.resources.withRequests({ storage: vars.enablePersistence.prometheusSizePV }) + - (if vars.enablePersistence.prometheusPV != '' then pvc.mixin.spec.withVolumeName(vars.enablePersistence.prometheusPV)), + (if vars.enablePersistence.prometheusPV != null then pvc.mixin.spec.withVolumeName(vars.enablePersistence.prometheusPV)), // Uncomment below to define a StorageClass name //+ pvc.mixin.spec.withStorageClassName('nfs-master-ssd'), }, @@ -125,7 +125,7 @@ local vars = import 'vars.jsonnet'; pvc.mixin.metadata.withName('grafana-storage') + pvc.mixin.spec.withAccessModes('ReadWriteOnce') + pvc.mixin.spec.resources.withRequests({ storage: vars.enablePersistence.grafanaSizePV }) + - (if vars.enablePersistence.grafanaPV != '' then pvc.mixin.spec.withVolumeName(vars.enablePersistence.grafanaPV)), + (if vars.enablePersistence.grafanaPV != null then pvc.mixin.spec.withVolumeName(vars.enablePersistence.grafanaPV)), } else {}, grafanaDashboards+:: $._config.grafanaDashboards, diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 6cd7b1d..c758677 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -30421,115 +30421,56 @@ items: "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" - }, - { - "datasource": "$datasource", - "enable": true, - "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)", - "hide": false, - "iconColor": "rgb(250, 44, 18)", - "limit": 100, - "name": "downage", - "showIn": 0, - "step": "30s", - "tagKeys": "instance", - "textFormat": "prometheus down", - "titleFormat": "Downage", - "type": "alert" - }, - { - "datasource": "$datasource", - "enable": true, - "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)", - "hide": false, - "iconColor": "#fceaca", - "limit": 100, - "name": "Reload", - "showIn": 0, - "step": "5m", - "tagKeys": "instance", - "tags": [ - - ], - "titleFormat": "Reload", - "type": "tags" } ] }, - "description": "Dashboard for monitoring of Prometheus v2.x.x", + "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", "editable": true, - "gnetId": 3681, + "gnetId": 162, "graphTooltip": 1, - "id": 13, - "iteration": 1549118131383, "links": [ - { - "icon": "info", - "tags": [ - ], - "targetBlank": true, - "title": "Dashboard's Github ", - "tooltip": "Github repo of this dashboard", - "type": "link", - "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard" - }, - { - "icon": "doc", - "tags": [ - - ], - "targetBlank": true, - "title": "Prometheus Docs", - "tooltip": "", - "type": "link", - "url": "http://prometheus.io/docs/introduction/overview/" - } ], "panels": [ { - "collapsed": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { - "h": 1, - "w": 24, + "h": 7, + "w": 8, "x": 0, "y": 0 }, - "id": 55, - "panels": [ - - ], - "repeat": null, - "title": "Header instance info", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#bf1b00" - ], - "datasource": "prometheus", - "decimals": 1, - "format": "s", - "gauge": { - "maxValue": 1000000, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 1 - }, - "id": 41, + "id": 4, "interval": null, + "isNew": true, "links": [ ], @@ -30567,15 +30508,16 @@ items: "tableColumn": "", "targets": [ { - "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})", + "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 } ], - "thresholds": "", - "title": "Uptime", + "thresholds": "65, 90", + "title": "Cluster memory usage", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -30589,398 +30531,494 @@ items: }, { "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, + "colorBackground": true, + "colorValue": false, "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#bf1b00" + "rgba(0, 0, 0, 0)", + "rgb(210, 1, 1)", + "#890f02" ], "datasource": "prometheus", - "format": "short", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "format": "percentunit", "gauge": { - "maxValue": 1000000, + "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, + "gridPos": { + "h": 2, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 23, + "interval": null, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "{job=\"kubelet\"}", + "targets": [ + { + "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "1.1", + "title": "Up Nodes", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 6, + "interval": null, + "isNew": true, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Cluster CPU usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ + + ], + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fontSize": "90%", "gridPos": { "h": 5, "w": 8, - "x": 4, - "y": 1 + "x": 8, + "y": 2 }, - "id": 42, - "interval": null, + "id": 25, "links": [ ], - "mappingType": 1, - "mappingTypes": [ + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 2, + "desc": false + }, + "styles": [ { - "name": "value to text", - "value": 1 + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "500000,800000,1000000", - "title": "Total count of time series", - "type": "singlestat", - "valueFontSize": "150%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "prometheus", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 12, - "y": 1 - }, - "id": 48, - "interval": null, - "links": [ + "alias": "Uptime", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [ - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + ], + "type": "number", + "unit": "s" }, { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/endpoint|job|namespace|pod|service/", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, { - "from": "null", - "text": "N/A", - "to": "null" + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "preserveFormat": false, + "sanitize": false, + "thresholds": [ + + ], + "type": "string", + "unit": "short" } ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "version", "targets": [ { - "expr": "prometheus_build_info{instance=\"$instance\"}", + "expr": "(time() - node_boot_time_seconds)", "format": "table", "instant": true, - "intervalFactor": 2, + "intervalFactor": 1, "refId": "A" } ], - "thresholds": "", - "title": "Version", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" + "title": "Node Uptime", + "transform": "table", + "transparent": true, + "type": "table-old" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], "datasource": "prometheus", - "decimals": 2, - "format": "ms", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "custom": { + + }, + "mappings": [ + { + "from": "", + "id": 0, + "operator": "", + "text": "Up", + "to": "", + "type": 1, + "value": "1" + }, + { + "from": "", + "id": 1, + "operator": "", + "text": "Down", + "to": "", + "type": 1, + "value": "0" + }, + { + "from": "", + "id": 2, + "operator": "", + "text": "Down", + "to": "", + "type": 1, + "value": "0.5" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(0, 0, 0)", + "value": null + }, + { + "color": "red", + "value": 0 + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [ + + ] }, "gridPos": { - "h": 5, - "w": 4, - "x": 15, - "y": 1 + "h": 2, + "w": 24, + "x": 0, + "y": 7 }, - "id": 49, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 + "id": 35, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false }, - "tableColumn": "", + "pluginVersion": "7.0.3", + "repeat": null, "targets": [ { - "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}", + "expr": "sort(avg by (job) (up{job=~\"kube-dns|kubelet|traefik.*|apiserver|kube-controller-manager|kube-scheduler\"}))", "format": "time_series", "instant": true, - "intervalFactor": 2, + "interval": "", + "legendFormat": "{{job}}", "refId": "A" } ], - "thresholds": "", - "title": "Actual head block length", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "content": "", - "gridPos": { - "h": 5, - "w": 2, - "x": 19, - "y": 1 - }, - "height": "", - "id": 50, - "links": [ - - ], - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#e6522c", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": "prometheus", - "decimals": 1, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 21, - "y": 1 - }, - "id": 52, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "10,20", - "title": "", - "transparent": true, - "type": "singlestat", - "valueFontSize": "200%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" + "timeFrom": null, + "timeShift": null, + "title": "Kubernetes Core Services", + "type": "stat" }, { "collapsed": false, + "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 6 + "y": 9 }, - "id": 56, + "id": 15, "panels": [ ], - "repeat": null, - "title": "Main info", + "title": "Nodes", "type": "row" }, { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0.85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "B", + "5m", + "now" + ] + }, + "reducer": { + "params": [ + + ], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0m", + "frequency": "60s", + "handler": 1, + "name": "Memory Usage alert", + "noDataState": "no_data", + "notifications": [ + + ] + }, "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 7 + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] }, - "id": 15, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 10, "legend": { - "avg": true, + "avg": false, "current": false, "max": false, "min": false, - "show": false, + "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -30988,6 +31026,11 @@ items: ], "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -30996,32 +31039,42 @@ items: ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)", + "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)", "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + }, + { + "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes", + "format": "time_series", + "hide": true, "intervalFactor": 1, - "legendFormat": "max duration for {{slice}}", - "metric": "prometheus_local_storage_rushed_mode", - "refId": "A", - "step": 900 + "refId": "B" } ], "thresholds": [ - + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.85 + } ], "timeFrom": null, "timeRegions": [ ], "timeShift": null, - "title": "Query elapsed time", + "title": "Memory Usage", "tooltip": { - "msResolution": false, "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -31036,11 +31089,873 @@ items: }, "yaxes": [ { - "format": "s", - "label": "", + "format": "decbytes", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 90 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "15m", + "now" + ] + }, + "reducer": { + "params": [ + + ], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "CPU Usage alert", + "noDataState": "no_data", + "notifications": [ + + ] + }, + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 10 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", + "format": "time_series", + "intervalFactor": 3, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 90 + } + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "decimals": null, + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [ + + ], + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 31, + "links": [ + + ], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 15, + "desc": true + }, + "styles": [ + { + "$$hashKey": "object:5708", + "alias": "Time", + "align": "auto", + "dateFormat": "HH:mm", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "$$hashKey": "object:5709", + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/__name__|endpoint|fstype|alertstate|long|mountpoint|namespace|Value|short|pod|service|job/", + "thresholds": [ + + ], + "type": "hidden", + "unit": "short" + }, + { + "$$hashKey": "object:5710", + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + + ], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ALERTS{alertstate=\"firing\", alertname!=\"Watchdog\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Active Alerts", + "transform": "table", + "type": "table-old" + }, + { + "dashboardFilter": "", + "dashboardTags": [ + + ], + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "folderId": null, + "gridPos": { + "h": 9, + "w": 5, + "x": 12, + "y": 19 + }, + "id": 27, + "limit": 10, + "links": [ + + ], + "nameFilter": "", + "onlyAlertsOnDashboard": false, + "show": "current", + "sortOrder": 1, + "stateFilter": [ + + ], + "title": "Alarms", + "type": "alertlist" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 19 + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [ + + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Cluster Filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "alert": { + "alertRuleTags": { + + }, + "conditions": [ + { + "evaluator": { + "params": [ + 1 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C", + "5m", + "now" + ] + }, + "reducer": { + "params": [ + + ], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "2m", + "frequency": "60s", + "handler": 1, + "name": "Node Down", + "noDataState": "alerting", + "notifications": [ + + ] + }, + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 7, + "x": 17, + "y": 24 + }, + "hiddenSeries": false, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [ + + ] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(up{job=\"kubelet\",metrics_path=\"/metrics\"}) BY (job)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "C" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 1 + } + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Up Nodes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "$$hashKey": "object:14563", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:14564", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [ + + ], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "CPU Temperature alert", + "noDataState": "no_data", + "notifications": [ + + ] + }, + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + + ], + "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rpi_cpu_temperature_celsius", + "format": "time_series", + "intervalFactor": 5, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + } + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "CPU Temperature", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "celsius", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 17, + "panels": [ + + ], + "title": "Pods", + "type": "row" + }, + { + "aliasColors": { + + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 0, + "fillGradient": 0, + "grid": { + + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 3, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 270, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + + ], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [ + + ] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod}}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + + ], + "timeFrom": null, + "timeRegions": [ + + ], + "timeShift": null, + "title": "Pod CPU usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + + ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, "show": true }, { @@ -31059,40 +31974,64 @@ items: }, { "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" + }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": "prometheus", + "decimals": 2, "editable": true, "error": false, - "fill": 1, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 0, + "fillGradient": 0, + "grid": { + + }, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 7 + "w": 24, + "x": 0, + "y": 42 }, - "id": 17, + "hiddenSeries": false, + "id": 2, + "isNew": true, "legend": { - "avg": false, - "current": false, + "alignAsTable": true, + "avg": true, + "current": true, "max": false, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "avg", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "connected", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -31105,19 +32044,22 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "created on {{ instance }}", - "metric": "prometheus_local_storage_maintain_series_duration_seconds_count", + "hide": true, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", + "metric": "container_memory_usage:sort_desc", "refId": "A", - "step": 1800 + "step": 10 }, { - "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", + "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "removed on {{ instance }}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", "refId": "B" } ], @@ -31129,329 +32071,7 @@ items: ], "timeShift": null, - "title": "Head series created/deleted", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 7 - }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "exceeded_sample_limit on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "A", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "duplicate_timestamp on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "B", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "out_of_bounds on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "C", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "out_of_order on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "D", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "rule_evaluation_failure on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "G", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "tsdb_compactions_failed on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "K", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "tsdb_reloads_failures on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "L", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "head_series_not_found on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "N", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "evaluator_iterations_missed on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "O", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "evaluator_iterations_skipped on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "P", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Prometheus errors", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 14 - }, - "id": 57, - "panels": [ - - ], - "repeat": null, - "title": "Scrape & rule duration", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "", - "editable": true, - "error": false, - "fill": 1, - "grid": { - - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 15 - }, - "id": 25, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": false, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval", - "format": "time_series", - "interval": "2m", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "metric": "", - "refId": "A", - "step": 300 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Scrape delay (counts with 1m scrape interval)", + "title": "Pod memory usage", "tooltip": { "msResolution": false, "shared": true, @@ -31470,7 +32090,8 @@ items: }, "yaxes": [ { - "format": "s", + "format": "bytes", + "label": null, "logBase": 1, "max": null, "min": null, @@ -31478,6 +32099,7 @@ items: }, { "format": "short", + "label": null, "logBase": 1, "max": null, "min": null, @@ -31491,375 +32113,64 @@ items: }, { "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" + }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": "prometheus", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 1, + "fillGradient": 0, + "grid": { + + }, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 15 - }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Queue length", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Queue length", - "metric": "prometheus_local_storage_indexing_queue_length", - "refId": "B", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Rule evaulation duration", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, "x": 0, - "y": 22 + "y": 49 }, - "id": 58, - "panels": [ - - ], - "repeat": null, - "title": "Requests & queries", - "type": "row" - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 23 - }, - "id": 18, + "hiddenSeries": false, + "id": 19, "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ handler }} on {{ instance }}", - "metric": "", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Request count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 23 - }, - "id": 16, - "legend": { - "avg": false, + "alignAsTable": true, + "avg": true, "current": false, "hideEmpty": true, "hideZero": true, "max": false, "min": false, - "show": false, + "rightSide": false, + "show": true, + "sideWidth": 550, + "sort": "avg", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{ handler }} on {{ instance }}", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Request duration per handler", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + "options": { + "dataLinks": [ ] }, - "yaxes": [ - { - "format": "µs", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 23 - }, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -31872,134 +32183,23 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0", + "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))", "format": "time_series", - "hide": false, + "interval": "", "intervalFactor": 2, - "legendFormat": "{{ handler }} in {{ instance }}", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Request size by handler", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Allocated bytes": "#F9BA8F", - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max count collector": "#bf1b00", - "Max count harvester": "#bf1b00", - "Max to persist": "#3F6833", - "RSS": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 23 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Max.*/", - "fill": 0, - "linewidth": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current count ", - "metric": "last", + "legendFormat": "{{ pod_name }}", "refId": "A", - "step": 1800 + "step": 240 }, { - "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)", + "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", "format": "time_series", + "hide": true, + "interval": "", "intervalFactor": 2, - "legendFormat": "Max count", - "metric": "last", + "legendFormat": "", "refId": "B", - "step": 1800 + "step": 10 } ], "thresholds": [ @@ -32010,12 +32210,12 @@ items: ], "timeShift": null, - "title": "Cont of concurent queries", + "title": "Sent Network Traffic per Container", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 2, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -32029,1258 +32229,20 @@ items: }, "yaxes": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 59, - "panels": [ - - ], - "repeat": null, - "title": "Alerting", - "type": "row" - }, - { - "aliasColors": { - "Alert queue capacity on o collector": "#bf1b00", - "Alert queue capacity on o harvester": "#bf1b00", - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 31 - }, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*capacity.*/", - "fill": 0, - "linewidth": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Alert queue capacity ", - "metric": "prometheus_local_storage_checkpoint_last_size_bytes", - "refId": "A", - "step": 1800 - }, - { - "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Alert queue size on ", - "metric": "prometheus_local_storage_checkpoint_last_size_bytes", - "refId": "B", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Alert queue size", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 31 - }, - "id": 21, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Checkpoint chunks written/s", - "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Count of discovered alertmanagers", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 31 - }, - "id": 39, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "notifications_dropped on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "F", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "rule_evaluation_failures on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Alerting errors", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 38 - }, - "id": 60, - "panels": [ - - ], - "repeat": null, - "title": "Service discovery", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 39 - }, - "id": 43, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Consul target sync count", - "refId": "A", - "step": 240 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Consul SD sync count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 39 - }, - "id": 44, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Marathon target sync count", - "refId": "A", - "step": 240 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Marathon SD sync count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 39 - }, - "id": 45, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Count of target synces", - "refId": "A", - "step": 240 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Kubernetes SD sync count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 39 - }, - "id": 46, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "exceeded_sample_limit on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "A", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sd_file_read_error on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "E", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sd_consul_rpc_failure on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "H", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sd_marathon_refresh_failure on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "I", - "step": 1800 - }, - { - "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sd_openstack_refresh_failure on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "J", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Service discovery errors", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 46 - }, - "id": 61, - "panels": [ - - ], - "repeat": null, - "title": "TSDB stats", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 47 - }, - "id": 36, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Reloaded block from disk", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 47 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Loaded data blocks", - "metric": "prometheus_local_storage_memory_chunkdescs", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Loaded data blocks", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 47 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Time series count", - "metric": "prometheus_local_storage_memory_series", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Time series total count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 47 - }, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "samples/s {{instance}}", - "metric": "prometheus_local_storage_ingested_samples_total", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Samples Appended per second", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", + "format": "Bps", "label": "", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "label": "", + "logBase": 10, + "max": 8, + "min": 0, + "show": false } ], "yaxis": { @@ -33288,1041 +32250,66 @@ items: "alignLevel": null } }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 54 - }, - "id": 62, - "panels": [ - - ], - "repeat": null, - "title": "Head block stats", - "type": "row" - }, { "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833", - "To persist": "#9AC48A" + }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": "prometheus", "editable": true, "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 55 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ + "fieldConfig": { + "defaults": { + "custom": { - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/Max.*/", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Head chunk count", - "metric": "prometheus_local_storage_memory_chunks", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Head chunks count", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ + } + }, + "overrides": [ ] }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { + "fill": 1, + "fillGradient": 0, + "grid": { }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 55 - }, - "id": 35, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Length of head block", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 55 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "created on {{ instance }}", - "refId": "B" - }, - { - "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "deleted on {{ instance }}", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Head Chunks Created/Deleted per second", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 62 - }, - "id": 63, - "panels": [ - - ], - "repeat": null, - "title": "Data maintenance", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 63 - }, - "id": 33, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "B" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Compaction duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 6, - "y": 63 - }, - "id": 34, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ quantile }} on {{ instance }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Go Garbage collection duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, + "w": 12, "x": 12, - "y": 63 + "y": 49 }, - "id": 37, + "hiddenSeries": false, + "id": 21, "legend": { - "avg": false, + "alignAsTable": true, + "avg": true, "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ quantile }} on {{ instance }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "WAL truncate duration seconds", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 63 - }, - "id": 38, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ quantile }} {{ instance }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "WAL fsync duration seconds", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 70 - }, - "id": 64, - "panels": [ - - ], - "repeat": null, - "title": "RAM&CPU", - "type": "row" - }, - { - "aliasColors": { - "Allocated bytes": "#7EB26D", - "Allocated bytes - 1m max": "#BF1B00", - "Allocated bytes - 1m min": "#BF1B00", - "Allocated bytes - 5m max": "#BF1B00", - "Allocated bytes - 5m min": "#BF1B00", - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833", - "RSS": "#447EBC" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "decimals": null, - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 71 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/-/", - "fill": 0 - }, - { - "alias": "collector heap size", - "color": "#E0752D", - "fill": 0, - "linewidth": 2 - }, - { - "alias": "collector kubernetes memory limit", - "color": "#BF1B00", - "fill": 0, - "linewidth": 3 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Total resident memory - {{instance}}", - "metric": "process_resident_memory_bytes", - "refId": "B", - "step": 1800 - }, - { - "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Total llocated bytes - {{instance}}", - "metric": "go_memstats_alloc_bytes", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Memory", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Allocated bytes": "#F9BA8F", - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833", - "RSS": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 71 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Allocated Bytes/s", - "metric": "go_memstats_alloc_bytes", - "refId": "A", - "step": 1800 - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Allocations per second", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "decimals": 2, - "editable": true, - "error": false, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 71 - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": false, - "show": false, + "show": true, + "sideWidth": 150, + "sort": "avg", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -34335,13 +32322,22 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)", + "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "CPU/s", - "metric": "prometheus_local_storage_ingested_samples_total", + "legendFormat": "{{pod_name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{pod_name}}", "refId": "B", - "step": 1800 + "step": 10 } ], "thresholds": [ @@ -34352,12 +32348,12 @@ items: ], "timeShift": null, - "title": "CPU per second", + "title": "Received Network Traffic per Container", "tooltip": { - "msResolution": false, + "msResolution": true, "shared": true, "sort": 2, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -34366,16 +32362,16 @@ items: "name": null, "show": true, "values": [ - "avg" + ] }, "yaxes": [ { - "format": "none", + "format": "Bps", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -34392,1489 +32388,66 @@ items: "alignLevel": null } }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 78 - }, - "id": 65, - "panels": [ - - ], - "repeat": null, - "title": "Heapster description", - "type": "row" - }, - { - "content": "Two rows bellow can serve as example if you are running **Prometheus** in **Kubernetes** and uses **Heapster** with **InfluxDB**.\n\nThe schema and queries are very hard to generalize so you will have to tweak them but I leave them here for inspiration.", - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 79 - }, - "id": 51, - "links": [ - - ], - "mode": "markdown", - "title": "Heapster rows", - "type": "text" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 82 - }, - "id": 66, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$influx_datasource", - "decimals": 2, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 91 - }, - "id": 28, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "cpu/limit.mean ", - "color": "#C15C17", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\")/1000 as \"usage_rate.mean\" FROM \"cpu/usage_rate\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "hide": false, - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\")/1000 as \"usage_rate.mean\" FROM \"cpu/usage_rate\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "hide": false, - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\")/1000 as \"CPU LIMIT\" FROM \"cpu/limit\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage/s", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$influx_datasource", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 91 - }, - "id": 29, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "cpu/limit.mean ", - "color": "#C15C17", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"usage_rate.mean\" FROM \"memory/usage\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "hide": false, - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"usage_rate.mean\" FROM \"memory/usage\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "hide": false, - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"CPU LIMIT\" FROM \"memory/limit\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "title": "Heapster RAM&CPU", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 83 - }, - "id": 67, - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$influx_datasource", - "fill": 1, - "gridPos": { - "h": 7, - "w": 10, - "x": 0, - "y": 92 - }, - "id": 30, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/errors/", - "color": "#C15C17", - "pointradius": 3, - "points": true - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"rx_rate.mean \" FROM \"network/rx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"rx_rate.mean\" FROM \"network/rx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\")*-1 as \"tx_rate.mean \" FROM \"network/tx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\")*-1 as \"tx_rate.mean\" FROM \"network/tx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", - "rawQuery": true, - "refId": "D", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"rx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", - "rawQuery": true, - "refId": "E", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"rx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", - "rawQuery": true, - "refId": "F", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"tx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", - "rawQuery": true, - "refId": "G", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"tx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", - "rawQuery": true, - "refId": "H", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Network rx[IN] / tx[OUT] in bytes/s", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$influx_datasource", - "fill": 1, - "gridPos": { - "h": 7, - "w": 10, - "x": 10, - "y": 92 - }, - "id": 31, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "cpu/limit.mean", - "color": "#C15C17", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"DISK_USAGE.mean \" FROM \"filesystem/usage\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "hide": false, - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"DISK_USAGE.mean \" FROM \"filesystem/usage\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "hide": false, - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"DISK_LIMIT.mean \" FROM \"filesystem/limit\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "hide": false, - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"DISK_LIMIT.mean\" FROM \"filesystem/limit\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", - "rawQuery": true, - "refId": "D", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk usage", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$influx_datasource", - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 20, - "y": 92 - }, - "id": 32, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "cpu/limit.mean ", - "color": "#C15C17", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"FREE_INODES.mean \" FROM \"filesystem/inodes_free\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "hide": false, - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"FREE_INODES.mean\" FROM \"filesystem/inodes_free\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"USED_INODES.mean \" FROM \"filesystem/inodes\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - }, - { - "dsType": "influxdb", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT mean(\"value\") as \"USED_INODES.mean\" FROM \"filesystem/inodes\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", - "rawQuery": true, - "refId": "D", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [ - - ], - "type": "mean" - } - ] - ], - "tags": [ - - ] - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Number of free INODES", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "title": "Heapster host stats", - "type": "row" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 84 - }, - "id": 68, - "panels": [ - - ], - "repeat": null, - "title": "Contrac errors", - "type": "row" - }, { "aliasColors": { - "Chunks": "#1F78C1", - "Chunks to persist": "#508642", - "Max chunks": "#052B51", - "Max to persist": "#3F6833" + }, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": "prometheus", + "decimals": 2, "editable": true, "error": false, - "fill": 1, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, + "fill": 0, + "fillGradient": 0, + "grid": { + + }, "gridPos": { "h": 7, "w": 24, "x": 0, - "y": 85 + "y": 56 }, - "id": 47, + "hiddenSeries": false, + "id": 8, + "isNew": true, "legend": { - "avg": false, - "current": false, + "alignAsTable": true, + "avg": true, + "current": true, "max": false, "min": false, - "show": false, + "rightSide": true, + "show": true, + "sideWidth": 220, + "sort": "current", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [ ], - "nullPointMode": "null", + "nullPointMode": "connected", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -35887,15 +32460,24 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0", + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}", - "metric": "prometheus_local_storage_chunk_ops_total", - "refId": "M", - "step": 1800 + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Receive Traffic", + "metric": "network", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Transmit Traffic", + "metric": "network", + "refId": "B", + "step": 10 } ], "thresholds": [ @@ -35906,12 +32488,12 @@ items: ], "timeShift": null, - "title": "Net errors", + "title": "Pod Network i/o", "tooltip": { "msResolution": false, "shared": true, - "sort": 2, - "value_type": "individual" + "sort": 0, + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -35925,14 +32507,16 @@ items: }, "yaxes": [ { - "format": "short", + "$$hashKey": "object:1163", + "format": "bytes", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { + "$$hashKey": "object:1164", "format": "short", "label": null, "logBase": 1, @@ -35947,168 +32531,16 @@ items: } } ], - "refresh": "5m", - "schemaVersion": 16, + "refresh": "10s", + "schemaVersion": 25, "style": "dark", "tags": [ - "custom" + "custom", + "overview" ], "templating": { "list": [ - { - "auto": true, - "auto_count": 30, - "auto_min": "2m", - "current": { - "text": "auto", - "value": "$__auto_interval_aggregation_interval" - }, - "hide": 0, - "label": "aggregation intarval", - "name": "aggregation_interval", - "options": [ - { - "selected": true, - "text": "auto", - "value": "$__auto_interval_aggregation_interval" - }, - { - "selected": false, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "10m", - "value": "10m" - }, - { - "selected": false, - "text": "30m", - "value": "30m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - }, - { - "selected": false, - "text": "6h", - "value": "6h" - }, - { - "selected": false, - "text": "12h", - "value": "12h" - }, - { - "selected": false, - "text": "1d", - "value": "1d" - }, - { - "selected": false, - "text": "7d", - "value": "7d" - }, - { - "selected": false, - "text": "14d", - "value": "14d" - }, - { - "selected": false, - "text": "30d", - "value": "30d" - } - ], - "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", - "refresh": 2, - "skipUrlSync": false, - "type": "interval" - }, - { - "allValue": null, - "current": { - "text": "10.32.0.53:9090", - "value": "10.32.0.53:9090" - }, - "datasource": "$datasource", - "definition": "", - "hide": 0, - "includeAll": false, - "label": "Instance", - "multi": false, - "name": "instance", - "options": [ - ], - "query": "label_values(prometheus_build_info, instance)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 2, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "text": "60", - "value": "60" - }, - "hide": 0, - "label": "Scrape interval seconds", - "name": "scrape_interval", - "options": [ - { - "text": "60", - "value": "60" - } - ], - "query": "60", - "skipUrlSync": false, - "type": "constant" - }, - { - "current": { - "text": "prometheus", - "value": "prometheus" - }, - "hide": 0, - "label": "Prometheus datasource", - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "current": { - "text": "No data sources found", - "value": "" - }, - "hide": 0, - "label": "InfluxDB datasource", - "name": "influx_datasource", - "options": [ - - ], - "query": "influxdb", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - } ] }, "time": { @@ -36117,7 +32549,6 @@ items: }, "timepicker": { "refresh_intervals": [ - "5s", "10s", "30s", "1m", @@ -36141,8 +32572,8 @@ items: ] }, "timezone": "browser", - "title": "Prometheus2.0", - "uid": "XmsJC9mRz", + "title": "⭐️ Kubernetes Monitoring Overview", + "uid": "82pBZCmRkasd", "version": 1 } kind: ConfigMap