mirror of
https://github.com/carlosedp/cluster-monitoring.git
synced 2024-11-25 19:05:44 +01:00
1671 lines
40 KiB
JSON
1671 lines
40 KiB
JSON
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": "-- Grafana --",
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ",
|
|
"editable": true,
|
|
"gnetId": 162,
|
|
"graphTooltip": 1,
|
|
"id": 12,
|
|
"links": [],
|
|
"panels": [
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "percent",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": true,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 4,
|
|
"interval": null,
|
|
"isNew": true,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "65, 90",
|
|
"title": "Cluster memory usage",
|
|
"type": "singlestat",
|
|
"valueFontSize": "80%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": true,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(0, 0, 0, 0)",
|
|
"rgb(210, 1, 1)",
|
|
"#890f02"
|
|
],
|
|
"datasource": "prometheus",
|
|
"format": "percentunit",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": false,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 0
|
|
},
|
|
"id": 23,
|
|
"interval": null,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "avg(up{job=\"kubelet\"}) BY (job)",
|
|
"format": "time_series",
|
|
"instant": true,
|
|
"intervalFactor": 1,
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": "1.1",
|
|
"title": "Up Nodes",
|
|
"type": "singlestat",
|
|
"valueFontSize": "120%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "avg"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": 0,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "percent",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": true,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 16,
|
|
"y": 0
|
|
},
|
|
"id": 6,
|
|
"interval": null,
|
|
"isNew": true,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "65, 90",
|
|
"title": "Cluster CPU usage",
|
|
"type": "singlestat",
|
|
"valueFontSize": "80%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"columns": [],
|
|
"datasource": "prometheus",
|
|
"fontSize": "90%",
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 2
|
|
},
|
|
"id": 25,
|
|
"links": [],
|
|
"pageSize": null,
|
|
"scroll": true,
|
|
"showHeader": true,
|
|
"sort": {
|
|
"col": 2,
|
|
"desc": false
|
|
},
|
|
"styles": [
|
|
{
|
|
"alias": "Time",
|
|
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
|
"link": false,
|
|
"pattern": "Time",
|
|
"type": "date"
|
|
},
|
|
{
|
|
"alias": "Uptime",
|
|
"colorMode": null,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
|
"decimals": 2,
|
|
"pattern": "Value",
|
|
"thresholds": [],
|
|
"type": "number",
|
|
"unit": "s"
|
|
},
|
|
{
|
|
"alias": "",
|
|
"colorMode": null,
|
|
"colors": [
|
|
"rgba(245, 54, 54, 0.9)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(50, 172, 45, 0.97)"
|
|
],
|
|
"decimals": 2,
|
|
"pattern": "/endpoint|job|namespace|pod|service/",
|
|
"thresholds": [],
|
|
"type": "hidden",
|
|
"unit": "short"
|
|
},
|
|
{
|
|
"alias": "",
|
|
"colorMode": null,
|
|
"colors": [
|
|
"rgba(245, 54, 54, 0.9)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(50, 172, 45, 0.97)"
|
|
],
|
|
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
|
"decimals": 2,
|
|
"pattern": "instance",
|
|
"preserveFormat": false,
|
|
"sanitize": false,
|
|
"thresholds": [],
|
|
"type": "string",
|
|
"unit": "short"
|
|
}
|
|
],
|
|
"targets": [
|
|
{
|
|
"expr": "(time() - node_boot_time_seconds)",
|
|
"format": "table",
|
|
"instant": true,
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Node Uptime",
|
|
"transform": "table",
|
|
"transparent": true,
|
|
"type": "table"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 7
|
|
},
|
|
"id": 15,
|
|
"panels": [],
|
|
"title": "Nodes",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"alert": {
|
|
"conditions": [
|
|
{
|
|
"evaluator": {
|
|
"params": [
|
|
3500000000
|
|
],
|
|
"type": "gt"
|
|
},
|
|
"operator": {
|
|
"type": "and"
|
|
},
|
|
"query": {
|
|
"params": [
|
|
"A",
|
|
"4m",
|
|
"now"
|
|
]
|
|
},
|
|
"reducer": {
|
|
"params": [],
|
|
"type": "avg"
|
|
},
|
|
"type": "query"
|
|
}
|
|
],
|
|
"executionErrorState": "alerting",
|
|
"frequency": "60s",
|
|
"handler": 1,
|
|
"name": "Memory Usage alert",
|
|
"noDataState": "no_data",
|
|
"notifications": []
|
|
},
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"fill": 1,
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"id": 10,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)",
|
|
"format": "time_series",
|
|
"intervalFactor": 2,
|
|
"legendFormat": "{{ instance }}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [
|
|
{
|
|
"colorMode": "critical",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 3500000000
|
|
}
|
|
],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "Memory Usage",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "decbytes",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
},
|
|
{
|
|
"alert": {
|
|
"conditions": [
|
|
{
|
|
"evaluator": {
|
|
"params": [
|
|
90
|
|
],
|
|
"type": "gt"
|
|
},
|
|
"operator": {
|
|
"type": "and"
|
|
},
|
|
"query": {
|
|
"params": [
|
|
"A",
|
|
"15m",
|
|
"now"
|
|
]
|
|
},
|
|
"reducer": {
|
|
"params": [],
|
|
"type": "max"
|
|
},
|
|
"type": "query"
|
|
}
|
|
],
|
|
"executionErrorState": "alerting",
|
|
"frequency": "60s",
|
|
"handler": 1,
|
|
"name": "CPU Usage alert",
|
|
"noDataState": "no_data",
|
|
"notifications": []
|
|
},
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"fill": 1,
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"id": 11,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)",
|
|
"format": "time_series",
|
|
"intervalFactor": 3,
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [
|
|
{
|
|
"colorMode": "critical",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 90
|
|
}
|
|
],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "CPU Usage",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"decimals": null,
|
|
"format": "percent",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
},
|
|
{
|
|
"columns": [],
|
|
"datasource": "prometheus",
|
|
"fontSize": "100%",
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 17
|
|
},
|
|
"id": 31,
|
|
"links": [],
|
|
"pageSize": null,
|
|
"scroll": true,
|
|
"showHeader": true,
|
|
"sort": {
|
|
"col": 0,
|
|
"desc": true
|
|
},
|
|
"styles": [
|
|
{
|
|
"alias": "Time",
|
|
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
|
"link": false,
|
|
"pattern": "Time",
|
|
"type": "date"
|
|
},
|
|
{
|
|
"alias": "",
|
|
"colorMode": null,
|
|
"colors": [
|
|
"rgba(245, 54, 54, 0.9)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(50, 172, 45, 0.97)"
|
|
],
|
|
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
|
"decimals": 2,
|
|
"pattern": "condition|container|daemonset|endpoint|namespace|node",
|
|
"thresholds": [],
|
|
"type": "hidden",
|
|
"unit": "short"
|
|
},
|
|
{
|
|
"alias": "",
|
|
"colorMode": null,
|
|
"colors": [
|
|
"rgba(245, 54, 54, 0.9)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(50, 172, 45, 0.97)"
|
|
],
|
|
"decimals": 2,
|
|
"pattern": "/.*/",
|
|
"thresholds": [],
|
|
"type": "number",
|
|
"unit": "short"
|
|
}
|
|
],
|
|
"targets": [
|
|
{
|
|
"expr": "ALERTS{alertstate=\"firing\"}",
|
|
"format": "table",
|
|
"instant": true,
|
|
"intervalFactor": 1,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}",
|
|
"format": "table",
|
|
"hide": true,
|
|
"intervalFactor": 1,
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Active Alerts",
|
|
"transform": "table",
|
|
"type": "table"
|
|
},
|
|
{
|
|
"dashboardFilter": "",
|
|
"folderId": null,
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 5,
|
|
"x": 12,
|
|
"y": 17
|
|
},
|
|
"id": 27,
|
|
"limit": 10,
|
|
"links": [],
|
|
"nameFilter": "",
|
|
"onlyAlertsOnDashboard": false,
|
|
"show": "current",
|
|
"sortOrder": 1,
|
|
"stateFilter": [],
|
|
"title": "Alarms",
|
|
"type": "alertlist"
|
|
},
|
|
{
|
|
"cacheTimeout": null,
|
|
"colorBackground": false,
|
|
"colorValue": false,
|
|
"colors": [
|
|
"rgba(50, 172, 45, 0.97)",
|
|
"rgba(237, 129, 40, 0.89)",
|
|
"rgba(245, 54, 54, 0.9)"
|
|
],
|
|
"datasource": "prometheus",
|
|
"decimals": null,
|
|
"editable": true,
|
|
"error": false,
|
|
"format": "percent",
|
|
"gauge": {
|
|
"maxValue": 100,
|
|
"minValue": 0,
|
|
"show": true,
|
|
"thresholdLabels": false,
|
|
"thresholdMarkers": true
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 7,
|
|
"x": 17,
|
|
"y": 17
|
|
},
|
|
"id": 7,
|
|
"interval": null,
|
|
"isNew": true,
|
|
"links": [],
|
|
"mappingType": 1,
|
|
"mappingTypes": [
|
|
{
|
|
"name": "value to text",
|
|
"value": 1
|
|
},
|
|
{
|
|
"name": "range to text",
|
|
"value": 2
|
|
}
|
|
],
|
|
"maxDataPoints": 100,
|
|
"nullPointMode": "connected",
|
|
"nullText": null,
|
|
"postfix": "",
|
|
"postfixFontSize": "50%",
|
|
"prefix": "",
|
|
"prefixFontSize": "50%",
|
|
"rangeMaps": [
|
|
{
|
|
"from": "null",
|
|
"text": "N/A",
|
|
"to": "null"
|
|
}
|
|
],
|
|
"sparkline": {
|
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
|
"full": false,
|
|
"lineColor": "rgb(31, 120, 193)",
|
|
"show": false
|
|
},
|
|
"tableColumn": "",
|
|
"targets": [
|
|
{
|
|
"expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"metric": "",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": "65, 90",
|
|
"title": "Cluster Filesystem usage",
|
|
"type": "singlestat",
|
|
"valueFontSize": "80%",
|
|
"valueMaps": [
|
|
{
|
|
"op": "=",
|
|
"text": "N/A",
|
|
"value": "null"
|
|
}
|
|
],
|
|
"valueName": "current"
|
|
},
|
|
{
|
|
"alert": {
|
|
"conditions": [
|
|
{
|
|
"evaluator": {
|
|
"params": [
|
|
1
|
|
],
|
|
"type": "lt"
|
|
},
|
|
"operator": {
|
|
"type": "and"
|
|
},
|
|
"query": {
|
|
"params": [
|
|
"C",
|
|
"5m",
|
|
"now"
|
|
]
|
|
},
|
|
"reducer": {
|
|
"params": [],
|
|
"type": "avg"
|
|
},
|
|
"type": "query"
|
|
}
|
|
],
|
|
"executionErrorState": "alerting",
|
|
"frequency": "60s",
|
|
"handler": 1,
|
|
"name": "Node Down",
|
|
"noDataState": "alerting",
|
|
"notifications": []
|
|
},
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"fill": 1,
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 7,
|
|
"x": 17,
|
|
"y": 22
|
|
},
|
|
"id": 29,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null as zero",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sum(up{job=\"kubelet\"}) BY (job)",
|
|
"format": "time_series",
|
|
"hide": true,
|
|
"instant": false,
|
|
"intervalFactor": 1,
|
|
"legendFormat": "Up Nodes",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"expr": "count(up{job=\"kubelet\"})",
|
|
"format": "time_series",
|
|
"hide": true,
|
|
"instant": false,
|
|
"intervalFactor": 1,
|
|
"legendFormat": "Total Nodes",
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"expr": "avg(up{job=\"kubelet\"}) BY (job)",
|
|
"format": "time_series",
|
|
"hide": false,
|
|
"intervalFactor": 1,
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"thresholds": [
|
|
{
|
|
"colorMode": "critical",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "lt",
|
|
"value": 1
|
|
}
|
|
],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "Up Nodes",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
},
|
|
{
|
|
"alert": {
|
|
"conditions": [
|
|
{
|
|
"evaluator": {
|
|
"params": [
|
|
85
|
|
],
|
|
"type": "gt"
|
|
},
|
|
"operator": {
|
|
"type": "and"
|
|
},
|
|
"query": {
|
|
"params": [
|
|
"A",
|
|
"1m",
|
|
"now"
|
|
]
|
|
},
|
|
"reducer": {
|
|
"params": [],
|
|
"type": "avg"
|
|
},
|
|
"type": "query"
|
|
}
|
|
],
|
|
"executionErrorState": "alerting",
|
|
"frequency": "60s",
|
|
"handler": 1,
|
|
"name": "CPU Temperature alert",
|
|
"noDataState": "no_data",
|
|
"notifications": []
|
|
},
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"fill": 1,
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 26
|
|
},
|
|
"id": 13,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": true,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "rpi_cpu_temperature_celsius",
|
|
"format": "time_series",
|
|
"intervalFactor": 5,
|
|
"legendFormat": "{{instance}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [
|
|
{
|
|
"colorMode": "critical",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 85
|
|
}
|
|
],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "CPU Temperature",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "celsius",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 32
|
|
},
|
|
"id": 17,
|
|
"panels": [],
|
|
"title": "Pods",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 0,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 33
|
|
},
|
|
"id": 3,
|
|
"isNew": true,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"hideEmpty": true,
|
|
"hideZero": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": 270,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "null as zero",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "topk(10,sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[1m] ) ))",
|
|
"format": "time_series",
|
|
"instant": false,
|
|
"interval": "",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ container_name}}",
|
|
"metric": "container_cpu",
|
|
"refId": "A",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "Pod CPU usage",
|
|
"tooltip": {
|
|
"msResolution": true,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "percentunit",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 40
|
|
},
|
|
"id": 2,
|
|
"isNew": true,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": 250,
|
|
"sort": "avg",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (container_name, image))",
|
|
"format": "time_series",
|
|
"hide": true,
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ container_name }}",
|
|
"metric": "container_memory_usage:sort_desc",
|
|
"refId": "A",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (container_name))",
|
|
"format": "time_series",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "{{ container_name }}",
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "Pod memory usage",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 2,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 1,
|
|
"grid": {},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 47
|
|
},
|
|
"id": 19,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": false,
|
|
"hideEmpty": true,
|
|
"hideZero": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": 550,
|
|
"sort": "avg",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "topk(10,sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])) by (name))",
|
|
"format": "time_series",
|
|
"intervalFactor": 2,
|
|
"legendFormat": "{{ name }}",
|
|
"refId": "A",
|
|
"step": 240
|
|
},
|
|
{
|
|
"expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
|
|
"format": "time_series",
|
|
"hide": true,
|
|
"intervalFactor": 2,
|
|
"legendFormat": "",
|
|
"refId": "B",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "Sent Network Traffic per Container",
|
|
"tooltip": {
|
|
"msResolution": true,
|
|
"shared": true,
|
|
"sort": 1,
|
|
"value_type": "cumulative"
|
|
},
|
|
"transparent": false,
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "Bps",
|
|
"label": "",
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": "",
|
|
"logBase": 10,
|
|
"max": 8,
|
|
"min": 0,
|
|
"show": false
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 1,
|
|
"grid": {},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 47
|
|
},
|
|
"id": 21,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": false,
|
|
"hideEmpty": true,
|
|
"hideZero": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": false,
|
|
"show": true,
|
|
"sideWidth": 150,
|
|
"sort": "avg",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "null",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "topk(10,sum(rate(container_network_receive_bytes_total{name=~\".+\"}[5m])) by (name))",
|
|
"format": "time_series",
|
|
"intervalFactor": 2,
|
|
"legendFormat": "{{name}}",
|
|
"refId": "A",
|
|
"step": 240
|
|
},
|
|
{
|
|
"expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])",
|
|
"format": "time_series",
|
|
"hide": true,
|
|
"intervalFactor": 2,
|
|
"legendFormat": "{{name}}",
|
|
"refId": "B",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "Received Network Traffic per Container",
|
|
"tooltip": {
|
|
"msResolution": true,
|
|
"shared": true,
|
|
"sort": 1,
|
|
"value_type": "cumulative"
|
|
},
|
|
"transparent": false,
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "Bps",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": "prometheus",
|
|
"decimals": 2,
|
|
"editable": true,
|
|
"error": false,
|
|
"fill": 0,
|
|
"grid": {},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 54
|
|
},
|
|
"id": 8,
|
|
"isNew": true,
|
|
"legend": {
|
|
"alignAsTable": true,
|
|
"avg": true,
|
|
"current": true,
|
|
"max": false,
|
|
"min": false,
|
|
"rightSide": true,
|
|
"show": true,
|
|
"sideWidth": 200,
|
|
"sort": "current",
|
|
"sortDesc": true,
|
|
"total": false,
|
|
"values": true
|
|
},
|
|
"lines": true,
|
|
"linewidth": 2,
|
|
"links": [],
|
|
"nullPointMode": "connected",
|
|
"percentage": false,
|
|
"pointradius": 5,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "Receive Traffic",
|
|
"metric": "network",
|
|
"refId": "A",
|
|
"step": 10
|
|
},
|
|
{
|
|
"expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
|
|
"format": "time_series",
|
|
"interval": "10s",
|
|
"intervalFactor": 1,
|
|
"legendFormat": "Transmit Traffic",
|
|
"metric": "network",
|
|
"refId": "B",
|
|
"step": 10
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeFrom": null,
|
|
"timeRegions": [],
|
|
"timeShift": null,
|
|
"title": "Pod Network i/o",
|
|
"tooltip": {
|
|
"msResolution": false,
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "cumulative"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"buckets": null,
|
|
"mode": "time",
|
|
"name": null,
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"format": "bytes",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
},
|
|
{
|
|
"format": "short",
|
|
"label": null,
|
|
"logBase": 1,
|
|
"max": null,
|
|
"min": null,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false,
|
|
"alignLevel": null
|
|
}
|
|
}
|
|
],
|
|
"refresh": "10s",
|
|
"schemaVersion": 16,
|
|
"style": "dark",
|
|
"tags": [
|
|
"custom"
|
|
],
|
|
"templating": {
|
|
"list": []
|
|
},
|
|
"time": {
|
|
"from": "now-3h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {
|
|
"refresh_intervals": [
|
|
"5s",
|
|
"10s",
|
|
"30s",
|
|
"1m",
|
|
"5m",
|
|
"15m",
|
|
"30m",
|
|
"1h",
|
|
"2h",
|
|
"1d"
|
|
],
|
|
"time_options": [
|
|
"5m",
|
|
"15m",
|
|
"1h",
|
|
"6h",
|
|
"12h",
|
|
"24h",
|
|
"2d",
|
|
"7d",
|
|
"30d"
|
|
]
|
|
},
|
|
"timezone": "browser",
|
|
"title": "Kubernetes cluster monitoring (via Prometheus)",
|
|
"uid": "82pBZCmRk",
|
|
"version": 2
|
|
} |