diff --git a/grafana-dashboards/kubernetes-cluster-dashboard.json b/grafana-dashboards/kubernetes-cluster-dashboard.json index 9f42016..a441b39 100644 --- a/grafana-dashboards/kubernetes-cluster-dashboard.json +++ b/grafana-dashboards/kubernetes-cluster-dashboard.json @@ -1,1680 +1,1761 @@ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", - "editable": true, - "gnetId": 162, - "graphTooltip": 1, - "id": 7, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 0 - }, - "id": 4, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster memory usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(0, 0, 0, 0)", - "rgb(210, 1, 1)", - "#890f02" - ], - "datasource": "prometheus", - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 8, - "x": 8, - "y": 0 - }, - "id": 23, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(up{job=\"kubelet\"}) BY (job)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "1.1", - "title": "Up Nodes", - "type": "singlestat", - "valueFontSize": "120%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": 0, - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 6, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster CPU usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "columns": [], - "datasource": "prometheus", - "fontSize": "90%", - "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 2 - }, - "id": 25, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 2, - "desc": false - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "link": false, - "pattern": "Time", - "type": "date" - }, - { - "alias": "Uptime", - "colorMode": null, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/endpoint|job|namespace|pod|service/", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "preserveFormat": false, - "sanitize": false, - "thresholds": [], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "(time() - node_boot_time_seconds)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "Node Uptime", - "transform": "table", - "transparent": true, - "type": "table" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 15, - "panels": [], - "title": "Nodes", - "type": "row" - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0.85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "B", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "0m", - "frequency": "60s", - "handler": 1, - "name": "Memory Usage alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "A" - }, - { - "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "refId": "B" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0.85 - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 90 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "15m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Usage alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 8 - }, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", - "format": "time_series", - "intervalFactor": 3, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 90 - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "percent", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "columns": [], - "datasource": "prometheus", - "fontSize": "100%", - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 31, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "link": false, - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "condition|container|daemonset|endpoint|namespace|node", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "ALERTS{alertstate=\"firing\"}", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - }, - { - "expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}", - "format": "table", - "hide": true, - "intervalFactor": 1, - "refId": "B" - } - ], - "title": "Active Alerts", - "transform": "table", - "type": "table" - }, - { - "dashboardFilter": "", - "dashboardTags": [], - "folderId": null, - "gridPos": { - "h": 9, - "w": 5, - "x": 12, - "y": 17 - }, - "id": 27, - "limit": 10, - "links": [], - "nameFilter": "", - "onlyAlertsOnDashboard": false, - "show": "current", - "sortOrder": 1, - "stateFilter": [], - "title": "Alarms", - "type": "alertlist" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "decimals": null, - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 7, - "x": 17, - "y": 17 - }, - "id": 7, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "metric": "", - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster Filesystem usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1 - ], - "type": "lt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "C", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Node Down", - "noDataState": "alerting", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 4, - "w": 7, - "x": 17, - "y": 22 - }, - "id": 29, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(up{job=\"kubelet\"}) BY (job)", - "format": "time_series", - "hide": true, - "instant": false, - "intervalFactor": 1, - "legendFormat": "Up Nodes", - "refId": "A" - }, - { - "expr": "count(up{job=\"kubelet\"})", - "format": "time_series", - "hide": true, - "instant": false, - "intervalFactor": 1, - "legendFormat": "Total Nodes", - "refId": "B" - }, - { - "expr": "avg(up{job=\"kubelet\"}) BY (job)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "C" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "lt", - "value": 1 - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Up Nodes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Temperature alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rpi_cpu_temperature_celsius", - "format": "time_series", - "intervalFactor": 5, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 85 - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Temperature", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "celsius", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 17, - "panels": [], - "title": "Pods", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 0, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 3, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 270, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[1m] ) ))", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ container_name}}", - "metric": "container_cpu", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pod CPU usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 40 - }, - "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 250, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (container_name, image))", - "format": "time_series", - "hide": true, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ container_name }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - }, - { - "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (container_name))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container_name }}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pod memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 47 - }, - "id": 19, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 550, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[5m])) by (pod_name))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "refId": "A", - "step": 240 - }, - { - "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sent Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "transparent": false, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 10, - "max": 8, - "min": 0, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 47 - }, - "id": 21, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 150, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod_name=~\".+\"}[5m])) by (pod_name))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{pod_name}}", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Received Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "transparent": false, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 54 - }, - "id": 8, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 200, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Receive Traffic", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Transmit Traffic", - "metric": "network", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Pod Network i/o", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "10s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "custom" - ], - "templating": { - "list": [] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" + "annotations": { + "list": [{ + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }] + }, + "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", + "editable": true, + "gnetId": 162, + "graphTooltip": 1, + "links": [], + "panels": [{ + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] + "datasource": "prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 4, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [{ + "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "65, 90", + "title": "Cluster memory usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" }, - "timezone": "browser", - "title": "Kubernetes cluster monitoring (via Prometheus)", - "uid": "82pBZCmRkasd", - "version": 1 - } \ No newline at end of file + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(0, 0, 0, 0)", + "rgb(210, 1, 1)", + "#890f02" + ], + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "{job=\"kubelet\"}", + "targets": [{ + "expr": "avg(up{job=\"kubelet\"}) BY (job)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }], + "thresholds": "1.1", + "title": "Up Nodes", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 6, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [{ + "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "65, 90", + "title": "Cluster CPU usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, + { + "columns": [], + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "90%", + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 25, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 2, + "desc": false + }, + "styles": [{ + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "alias": "Uptime", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/endpoint|job|namespace|pod|service/", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "preserveFormat": false, + "sanitize": false, + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [{ + "expr": "(time() - node_boot_time_seconds)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + }], + "title": "Node Uptime", + "transform": "table", + "transparent": true, + "type": "table-old" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 15, + "panels": [], + "title": "Nodes", + "type": "row" + }, + { + "alert": { + "conditions": [{ + "evaluator": { + "params": [ + 0.85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "B", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + }], + "executionErrorState": "alerting", + "for": "0m", + "frequency": "60s", + "handler": 1, + "name": "Memory Usage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + }, + { + "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "refId": "B" + } + ], + "thresholds": [{ + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.85 + }], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "conditions": [{ + "evaluator": { + "params": [ + 90 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "15m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + }], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "CPU Usage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 8 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", + "format": "time_series", + "intervalFactor": 3, + "legendFormat": "{{instance}}", + "refId": "A" + }], + "thresholds": [{ + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 90 + }], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "decimals": null, + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 31, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [{ + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "condition|container|daemonset|endpoint|namespace|node", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [{ + "expr": "ALERTS{alertstate=\"firing\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + }, + { + "expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}", + "format": "table", + "hide": true, + "intervalFactor": 1, + "refId": "B" + } + ], + "title": "Active Alerts", + "transform": "table", + "type": "table-old" + }, + { + "dashboardFilter": "", + "dashboardTags": [], + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "folderId": null, + "gridPos": { + "h": 9, + "w": 5, + "x": 12, + "y": 17 + }, + "id": 27, + "limit": 10, + "links": [], + "nameFilter": "", + "onlyAlertsOnDashboard": false, + "show": "current", + "sortOrder": 1, + "stateFilter": [], + "title": "Alarms", + "type": "alertlist" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 17 + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [{ + "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "metric": "", + "refId": "A", + "step": 10 + }], + "thresholds": "65, 90", + "title": "Cluster Filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, + { + "alert": { + "conditions": [{ + "evaluator": { + "params": [ + 1 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + }], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Node Down", + "noDataState": "alerting", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 4, + "w": 7, + "x": 17, + "y": 22 + }, + "hiddenSeries": false, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "sum(up{job=\"kubelet\"}) BY (job)", + "format": "time_series", + "hide": true, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Up Nodes", + "refId": "A" + }, + { + "expr": "count(up{job=\"kubelet\"})", + "format": "time_series", + "hide": true, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Total Nodes", + "refId": "B" + }, + { + "expr": "avg(up{job=\"kubelet\"}) BY (job)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "C" + } + ], + "thresholds": [{ + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 1 + }], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Up Nodes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "conditions": [{ + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + }], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "CPU Temperature alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "rpi_cpu_temperature_celsius", + "format": "time_series", + "intervalFactor": 5, + "legendFormat": "{{instance}}", + "refId": "A" + }], + "thresholds": [{ + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + }], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Temperature", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "format": "celsius", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 17, + "panels": [], + "title": "Pods", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 3, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 270, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod}}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + }], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pod CPU usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 40 + }, + "hiddenSeries": false, + "id": 2, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))", + "format": "time_series", + "hide": true, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pod memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "hiddenSeries": false, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 550, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }}", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sent Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 10, + "max": 8, + "min": 0, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "hiddenSeries": false, + "id": 21, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 150, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod_name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{pod_name}}", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Received Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 54 + }, + "hiddenSeries": false, + "id": 8, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 220, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Receive Traffic", + "metric": "network", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Transmit Traffic", + "metric": "network", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pod Network i/o", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [{ + "$$hashKey": "object:1163", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1164", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 25, + "style": "dark", + "tags": [ + "custom" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kubernetes cluster monitoring (via Prometheus)", + "uid": "82pBZCmRkasd", + "version": 1 +} \ No newline at end of file diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index ea6224f..2d0a140 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -20576,7 +20576,6 @@ items: "editable": true, "gnetId": 162, "graphTooltip": 1, - "id": 7, "links": [ ], @@ -20593,6 +20592,16 @@ items: "datasource": "prometheus", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "format": "percent", "gauge": { "maxValue": 100, @@ -20678,6 +20687,16 @@ items: "#890f02" ], "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "format": "percentunit", "gauge": { "maxValue": 100, @@ -20728,7 +20747,7 @@ items: "lineColor": "rgb(31, 120, 193)", "show": false }, - "tableColumn": "", + "tableColumn": "{job=\"kubelet\"}", "targets": [ { "expr": "avg(up{job=\"kubelet\"}) BY (job)", @@ -20765,6 +20784,16 @@ items: "decimals": 0, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "format": "percent", "gauge": { "maxValue": 100, @@ -20845,6 +20874,16 @@ items: ], "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fontSize": "90%", "gridPos": { "h": 5, @@ -20866,6 +20905,7 @@ items: "styles": [ { "alias": "Time", + "align": "auto", "dateFormat": "YYYY-MM-DD HH:mm:ss", "link": false, "pattern": "Time", @@ -20873,6 +20913,7 @@ items: }, { "alias": "Uptime", + "align": "auto", "colorMode": null, "colors": [ "rgba(50, 172, 45, 0.97)", @@ -20890,6 +20931,7 @@ items: }, { "alias": "", + "align": "auto", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", @@ -20906,6 +20948,7 @@ items: }, { "alias": "", + "align": "auto", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", @@ -20936,10 +20979,11 @@ items: "title": "Node Uptime", "transform": "table", "transparent": true, - "type": "table" + "type": "table-old" }, { "collapsed": false, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -20999,13 +21043,25 @@ items: "dashLength": 10, "dashes": false, "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 8 }, + "hiddenSeries": false, "id": 10, "legend": { "avg": false, @@ -21022,6 +21078,11 @@ items: ], "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -21146,13 +21207,25 @@ items: "dashLength": 10, "dashes": false, "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 8 }, + "hiddenSeries": false, "id": 11, "legend": { "avg": false, @@ -21169,6 +21242,11 @@ items: ], "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -21247,6 +21325,16 @@ items: ], "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fontSize": "100%", "gridPos": { "h": 9, @@ -21268,6 +21356,7 @@ items: "styles": [ { "alias": "Time", + "align": "auto", "dateFormat": "YYYY-MM-DD HH:mm:ss", "link": false, "pattern": "Time", @@ -21275,6 +21364,7 @@ items: }, { "alias": "", + "align": "auto", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", @@ -21292,6 +21382,7 @@ items: }, { "alias": "", + "align": "auto", "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", @@ -21325,13 +21416,24 @@ items: ], "title": "Active Alerts", "transform": "table", - "type": "table" + "type": "table-old" }, { "dashboardFilter": "", "dashboardTags": [ ], + "datasource": null, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "folderId": null, "gridPos": { "h": 9, @@ -21367,6 +21469,16 @@ items: "decimals": null, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "format": "percent", "gauge": { "maxValue": 100, @@ -21488,13 +21600,25 @@ items: "dashLength": 10, "dashes": false, "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 4, "w": 7, "x": 17, "y": 22 }, + "hiddenSeries": false, "id": 29, "legend": { "avg": false, @@ -21511,6 +21635,11 @@ items: ], "nullPointMode": "null as zero", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -21646,13 +21775,25 @@ items: "dashLength": 10, "dashes": false, "datasource": "prometheus", + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 26 }, + "hiddenSeries": false, "id": 13, "legend": { "avg": false, @@ -21669,6 +21810,11 @@ items: ], "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -21743,6 +21889,7 @@ items: }, { "collapsed": false, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -21767,7 +21914,18 @@ items: "decimals": 0, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 0, + "fillGradient": 0, "grid": { }, @@ -21777,6 +21935,7 @@ items: "x": 0, "y": 33 }, + "hiddenSeries": false, "id": 3, "isNew": true, "legend": { @@ -21801,6 +21960,11 @@ items: ], "nullPointMode": "null as zero", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -21813,12 +21977,12 @@ items: "steppedLine": false, "targets": [ { - "expr": "topk(10,sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[1m] ) ))", + "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))", "format": "time_series", "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ container_name}}", + "legendFormat": "{{ pod}}", "metric": "container_cpu", "refId": "A", "step": 10 @@ -21883,7 +22047,18 @@ items: "decimals": 2, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 0, + "fillGradient": 0, "grid": { }, @@ -21893,6 +22068,7 @@ items: "x": 0, "y": 40 }, + "hiddenSeries": false, "id": 2, "isNew": true, "legend": { @@ -21915,6 +22091,11 @@ items: ], "nullPointMode": "connected", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -21927,21 +22108,22 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (container_name, image))", + "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))", "format": "time_series", "hide": true, "interval": "10s", "intervalFactor": 1, - "legendFormat": "{{ container_name }}", + "legendFormat": "{{ pod }}", "metric": "container_memory_usage:sort_desc", "refId": "A", "step": 10 }, { - "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (container_name))", + "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))", "format": "time_series", + "interval": "", "intervalFactor": 1, - "legendFormat": "{{ container_name }}", + "legendFormat": "{{ pod }}", "refId": "B" } ], @@ -22003,7 +22185,18 @@ items: "datasource": "prometheus", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 1, + "fillGradient": 0, "grid": { }, @@ -22013,6 +22206,7 @@ items: "x": 0, "y": 47 }, + "hiddenSeries": false, "id": 19, "legend": { "alignAsTable": true, @@ -22036,6 +22230,11 @@ items: ], "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -22048,8 +22247,9 @@ items: "steppedLine": false, "targets": [ { - "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[5m])) by (pod_name))", + "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))", "format": "time_series", + "interval": "", "intervalFactor": 2, "legendFormat": "{{ pod_name }}", "refId": "A", @@ -22059,6 +22259,7 @@ items: "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", "format": "time_series", "hide": true, + "interval": "", "intervalFactor": 2, "legendFormat": "", "refId": "B", @@ -22080,7 +22281,6 @@ items: "sort": 2, "value_type": "cumulative" }, - "transparent": false, "type": "graph", "xaxis": { "buckets": null, @@ -22124,7 +22324,18 @@ items: "datasource": "prometheus", "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 1, + "fillGradient": 0, "grid": { }, @@ -22134,6 +22345,7 @@ items: "x": 12, "y": 47 }, + "hiddenSeries": false, "id": 21, "legend": { "alignAsTable": true, @@ -22157,6 +22369,11 @@ items: ], "nullPointMode": "null", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -22169,8 +22386,9 @@ items: "steppedLine": false, "targets": [ { - "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod_name=~\".+\"}[5m])) by (pod_name))", + "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))", "format": "time_series", + "interval": "", "intervalFactor": 2, "legendFormat": "{{pod_name}}", "refId": "A", @@ -22201,7 +22419,6 @@ items: "sort": 2, "value_type": "cumulative" }, - "transparent": false, "type": "graph", "xaxis": { "buckets": null, @@ -22246,7 +22463,18 @@ items: "decimals": 2, "editable": true, "error": false, + "fieldConfig": { + "defaults": { + "custom": { + + } + }, + "overrides": [ + + ] + }, "fill": 0, + "fillGradient": 0, "grid": { }, @@ -22256,6 +22484,7 @@ items: "x": 0, "y": 54 }, + "hiddenSeries": false, "id": 8, "isNew": true, "legend": { @@ -22266,7 +22495,7 @@ items: "min": false, "rightSide": true, "show": true, - "sideWidth": 200, + "sideWidth": 220, "sort": "current", "sortDesc": true, "total": false, @@ -22278,6 +22507,11 @@ items: ], "nullPointMode": "connected", + "options": { + "dataLinks": [ + + ] + }, "percentage": false, "pointradius": 5, "points": false, @@ -22337,6 +22571,7 @@ items: }, "yaxes": [ { + "$$hashKey": "object:1163", "format": "bytes", "label": null, "logBase": 1, @@ -22345,6 +22580,7 @@ items: "show": true }, { + "$$hashKey": "object:1164", "format": "short", "label": null, "logBase": 1, @@ -22360,7 +22596,7 @@ items: } ], "refresh": "10s", - "schemaVersion": 16, + "schemaVersion": 25, "style": "dark", "tags": [ "custom"