diff --git a/generate_ingress_auth.sh b/generate_ingress_auth.sh new file mode 100644 index 0000000..d3fb7af --- /dev/null +++ b/generate_ingress_auth.sh @@ -0,0 +1,6 @@ +#!/bin/bash +if [[ $# -eq 0 ]] ; then + echo "Run the script with the required auth user and namespace for the secret: ${0} [user] [namespace]" + exit 0 +fi +printf "${1}:`openssl passwd -apr1`\n" >> auth \ No newline at end of file diff --git a/grafana-dashboards/Node Exporter Server Metrics-1520350339243.json b/grafana-dashboards/Node Exporter Server Metrics-1520350339243.json deleted file mode 100644 index afcbba7..0000000 --- a/grafana-dashboards/Node Exporter Server Metrics-1520350339243.json +++ /dev/null @@ -1,1609 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "text", - "name": "Text", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Dashboard to view multiple servers", - "editable": true, - "gnetId": 405, - "graphTooltip": 0, - "id": null, - "iteration": 1520350315682, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 56, - "panels": [], - "title": "Main", - "type": "row" - }, - { - "content": "", - "editable": true, - "error": false, - "gridPos": { - "h": 1, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 11, - "links": [], - "minSpan": 4, - "mode": "html", - "repeat": "node", - "repeatDirection": "h", - "style": {}, - "title": "$node", - "type": "text" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 8, - "x": 0, - "y": 2 - }, - "id": 20, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": "node", - "repeatDirection": "h", - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(node_cpu{instance=~\"$node\", mode=\"system\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "A", - "step": 14400, - "target": "" - } - ], - "thresholds": "", - "title": "CPU Cores", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 3, - "editable": true, - "error": false, - "fill": 10, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 5 - }, - "id": 7, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 0, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": true, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "repeatDirection": "h", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (mode)(irate(node_cpu{mode=\"system\",instance=~'$node'}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{mode}}", - "metric": "", - "refId": "A", - "step": 1200, - "target": "" - }, - { - "expr": "sum by (mode)(irate(node_cpu{mode='user',instance=~'$node'}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "user", - "refId": "B", - "step": 1200 - }, - { - "expr": "sum by (mode)(irate(node_cpu{mode='nice',instance=~'$node'}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "nice", - "refId": "C", - "step": 1200 - }, - { - "expr": "sum by (mode)(irate(node_cpu{mode='iowait',instance=~'$node'}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "iowait", - "refId": "E", - "step": 1200 - }, - { - "expr": "sum by (mode)(irate(node_cpu{mode='steal',instance=~'$node'}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "steal", - "refId": "H", - "step": 1200 - }, - { - "expr": "sum by (mode)(irate(node_cpu{mode='idle',instance=~'$node'}[5m]))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "idle", - "refId": "D", - "step": 1200 - }, - { - "expr": "sum by (mode)(irate(node_cpu{mode='irq',instance=~'$node'}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "irq", - "refId": "F", - "step": 1200 - }, - { - "expr": "sum by (mode)(irate(node_cpu{mode='softirq',instance=~'$node'}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "softirq", - "refId": "G", - "step": 1200 - }, - { - "expr": "sum by (mode)(irate(node_cpu{mode='guest',instance=~'$node'}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "guest", - "refId": "I", - "step": 1200 - } - ], - "thresholds": [ - { - "colorMode": "custom", - "fill": true, - "fillColor": "rgba(216, 200, 27, 0.27)", - "op": "gt", - "value": 0 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "%", - "logBase": 1, - "max": 100, - "min": 0, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "Slab": "#E5A8E2", - "Swap": "#E24D42" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 12 - }, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [ - { - "alias": "/Apps|Buffers|Cached|Free|Slab|SwapCached|PageTables|VmallocUsed/", - "fill": 5, - "stack": true - }, - { - "alias": "Swap", - "fill": 5, - "stack": true - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "( node_memory_MemTotal{instance=~'$node'} - node_memory_MemFree{instance=~'$node'} - node_memory_Buffers{instance=~'$node'} - node_memory_Cached{instance=~'$node'} - node_memory_SwapCached{instance=~'$node'} - node_memory_Slab{instance=~'$node'} - node_memory_PageTables{instance=~'$node'} - node_memory_VmallocUsed{instance=~'$node'} )", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Apps", - "metric": "", - "refId": "A", - "step": 1200, - "target": "" - }, - { - "expr": "node_memory_Buffers{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Buffers", - "refId": "B", - "step": 1200 - }, - { - "expr": "node_memory_Cached{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Cached", - "refId": "D", - "step": 1200 - }, - { - "expr": "node_memory_MemFree{instance=~'$node'}", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Free", - "refId": "E", - "step": 1200 - }, - { - "expr": "node_memory_Slab{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Slab", - "refId": "F", - "step": 1200 - }, - { - "expr": "node_memory_SwapCached{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "SwapCached", - "refId": "G", - "step": 1200 - }, - { - "expr": "node_memory_PageTables{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "PageTables", - "refId": "H", - "step": 1200 - }, - { - "expr": "node_memory_VmallocUsed{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "VmallocUsed", - "metric": "", - "refId": "I", - "step": 1200 - }, - { - "expr": "(node_memory_SwapTotal{instance=~'$node'} - node_memory_SwapFree{instance=~'$node'})", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Swap", - "metric": "", - "refId": "C", - "step": 1200 - }, - { - "expr": "node_memory_Committed_AS{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Committed", - "metric": "", - "refId": "J", - "step": 1200 - }, - { - "expr": "node_memory_Mapped{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Mapped", - "refId": "K", - "step": 1200 - }, - { - "expr": "node_memory_Active{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Active", - "metric": "", - "refId": "L", - "step": 1200 - }, - { - "expr": "node_memory_Inactive{instance=~'$node'}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Inactive", - "metric": "", - "refId": "M", - "step": 1200 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Memory", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "GB", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 19 - }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "repeatDirection": "h", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_load1{instance=~\"$node\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "load", - "metric": "", - "refId": "A", - "step": 1200, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Load", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 3, - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 26 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100.0 - 100 * (node_filesystem_avail{instance=~'$node',device !~'tmpfs',device!~'by-uuid'} / node_filesystem_size{instance=~'$node',device !~'tmpfs',device!~'by-uuid'})", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{mountpoint}}", - "metric": "", - "refId": "A", - "step": 1200, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Space Used", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "logBase": 1, - "max": 100, - "min": 0, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 33 - }, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_io_time_ms{instance=~\"$node\"}[5m])/10", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "metric": "", - "refId": "A", - "step": 1200, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Utilization per Device", - "tooltip": { - "msResolution": false, - "shared": false, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "logBase": 1, - "max": 100, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 40 - }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [ - { - "alias": "/.*_read$/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_reads_completed{instance=~'$node'}[5m])", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}}_read", - "metric": "", - "refId": "A", - "step": 2400, - "target": "" - }, - { - "expr": "irate(node_disk_writes_completed{instance=~'$node'}[5m])", - "intervalFactor": 2, - "legendFormat": "{{device}}_write", - "metric": "", - "refId": "B", - "step": 1200 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk IOs per Device", - "tooltip": { - "msResolution": false, - "shared": false, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "IO/second read (-) / write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 47 - }, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [ - { - "alias": "/.*_read/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_sectors_read{instance=~'$node'}[5m]) * 512", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}}_read", - "refId": "B", - "step": 2400 - }, - { - "expr": "irate(node_disk_sectors_written{instance=~'$node'}[5m]) * 512", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}}_write", - "metric": "", - "refId": "A", - "step": 2400, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Throughput per Device", - "tooltip": { - "msResolution": false, - "shared": false, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "Bytes/second read (-) / write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 54 - }, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_context_switches{instance=~\"$node\"}[5m])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "context switches", - "metric": "", - "refId": "A", - "step": 1200, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Context Switches", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 61 - }, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [ - { - "alias": "/.*_in/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_bytes{instance=~'$node'}[5m])*8", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{device}}_in", - "metric": "", - "refId": "A", - "step": 1200, - "target": "" - }, - { - "expr": "irate(node_network_transmit_bytes{instance=~'$node'}[5m])*8", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{device}}_out", - "refId": "B", - "step": 1200 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network Traffic", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bits", - "label": "bits in (-) / bits out (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 68 - }, - "id": 21, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}", - "intervalFactor": 2, - "legendFormat": "established", - "refId": "A", - "step": 1200, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Netstat", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 75 - }, - "id": 23, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [ - { - "alias": "/.*Out.*/", - "transform": "negative-Y" - }, - { - "alias": "Udp_NoPorts", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_Udp_InDatagrams{instance=~\"$node\"}[5m])", - "intervalFactor": 2, - "legendFormat": "Udp_InDatagrams", - "refId": "A", - "step": 1200, - "target": "" - }, - { - "expr": "irate(node_netstat_Udp_InErrors{instance=~\"$node\"}[5m])", - "intervalFactor": 2, - "legendFormat": "Udp_InErrors", - "refId": "B", - "step": 1200 - }, - { - "expr": "irate(node_netstat_Udp_OutDatagrams{instance=~\"$node\"}[5m])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Udp_OutDatagrams", - "refId": "C", - "step": 1200 - }, - { - "expr": "irate(node_netstat_Udp_NoPorts{instance=~\"$node\"}[5m])", - "intervalFactor": 2, - "legendFormat": "Udp_NoPorts", - "refId": "D", - "step": 1200 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "UDP Stats", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 82 - }, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "node", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_nf_conntrack_entries_limit{instance=~\"$node\"} - node_nf_conntrack_entries{instance=~\"$node\"}", - "intervalFactor": 2, - "legendFormat": "free", - "refId": "A", - "step": 1200, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Conntrack", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 89 - }, - "id": 88, - "panels": [], - "title": "Row title", - "type": "row" - } - ], - "refresh": "30s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "custom" - ], - "templating": { - "list": [ - { - "allFormat": "glob", - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": "", - "multi": true, - "multiFormat": "regex values", - "name": "node", - "options": [], - "query": "label_values(node_boot_time, instance)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allFormat": "glob", - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": "", - "multi": true, - "multiFormat": "regex values", - "name": "node_no_port", - "options": [], - "query": "label_values(node_boot_time, instance)", - "refresh": 1, - "regex": "/^(.*)\\:.*/", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "now": true, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Node Exporter Server Metrics", - "uid": "GxJ3JT6zk", - "version": 4 -} \ No newline at end of file diff --git a/grafana-dashboards/Traefik Realtime Metrics-1520350498858.json b/grafana-dashboards/Traefik Realtime Metrics-1520350498858.json deleted file mode 100644 index 55aff5e..0000000 --- a/grafana-dashboards/Traefik Realtime Metrics-1520350498858.json +++ /dev/null @@ -1,1180 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.2.2" - }, - { - "type": "panel", - "id": "grafana-piechart-panel", - "name": "Pie Chart", - "version": "1.2.0" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Visualize Traefik Metrics", - "editable": true, - "gnetId": 2870, - "graphTooltip": 0, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 8, - "x": 0, - "y": 0 - }, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(traefik_entrypoint_request_duration_seconds_sum) / avg(traefik_entrypoint_requests_total) * 1000", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "300,600", - "title": "Average Response Time (ms)", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 8, - "x": 8, - "y": 0 - }, - "id": 3, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "min(time() - process_start_time_seconds{job=\"traefik-ingress-lb\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Uptime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(26, 206, 22, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 9, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(rate(traefik_entrypoint_requests_total{code=\"404\",method=\"GET\"}[5m])) * 1000", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "metric": "traefik_requests_total", - "refId": "A", - "step": 60 - } - ], - "thresholds": "0,1", - "title": "404 Error Count last 5 Minutes", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "max" - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 2 - }, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(traefik_entrypoint_requests_total{entrypoint=\"http\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{http}}", - "metric": "", - "refId": "A", - "step": 20 - }, - { - "expr": "sum(traefik_entrypoint_requests_total{entrypoint=\"https\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{https}}", - "refId": "B", - "step": 20 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Total requests", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "Count", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1000 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Average response time alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 2 - }, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(traefik_entrypoint_request_duration_seconds_sum) / avg(traefik_entrypoint_requests_total) * 1000", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Average response time (ms)", - "refId": "A", - "step": 240 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1000 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Average response time", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 2 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traefik_entrypoint_requests_total[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{requests}}", - "refId": "A", - "step": 20 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Requests in last 5 minutes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "fill": 1, - "gridPos": { - "h": 11, - "w": 8, - "x": 0, - "y": 10 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"http|https\",code=\"200\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{service}} {{method}} {{code}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Successful Status Code Count (5min)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "fill": 1, - "gridPos": { - "h": 11, - "w": 8, - "x": 8, - "y": 10 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"http|https\",code!=\"200\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{service}} {{method}} {{code}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Bad Status Code Count (5m)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 11, - "w": 8, - "x": 16, - "y": 10 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(traefik_entrypoint_requests_total) by (code)", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "legendFormat": "{{code}}", - "refId": "A", - "step": 20 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Requests by Request Code", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 13, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"http|https\",code=\"200\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{method}} : {{code}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Status code 200 over 5min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 21 - }, - "id": 15, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"http|https\",code!=\"200\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ method }} : {{code}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Others status code over 5min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "${DS_PROMETHEUS}", - "fontSize": "80%", - "format": "short", - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 28 - }, - "id": 17, - "interval": null, - "legend": { - "percentage": true, - "show": true, - "values": false - }, - "legendType": "Right side", - "links": [], - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": 1, - "targets": [ - { - "expr": "sum(rate(traefik_entrypoint_requests_total[5m])) by (entrypoint)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ entrypoint}}", - "refId": "A" - } - ], - "title": "Requests by service", - "type": "grafana-piechart-panel", - "valueName": "current" - }, - { - "aliasColors": {}, - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": "${DS_PROMETHEUS}", - "fontSize": "80%", - "format": "short", - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 28 - }, - "id": 19, - "interval": null, - "legend": { - "percentage": true, - "show": true, - "values": false - }, - "legendType": "Right side", - "links": [], - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": 1, - "targets": [ - { - "expr": "sum(rate(traefik_entrypoint_requests_total[5m])) by (entrypoint) ", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ entrypoint}}", - "refId": "A" - } - ], - "title": "Requests by protocol", - "type": "grafana-piechart-panel", - "valueName": "current" - } - ], - "refresh": "30s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "custom" - ], - "templating": { - "list": [] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Traefik Realtime Metrics", - "uid": "000000010", - "version": 9 -} diff --git a/grafana-dashboards/kubernetes-cluster-dashboard.json b/grafana-dashboards/kubernetes-cluster-dashboard.json new file mode 100644 index 0000000..797dd38 --- /dev/null +++ b/grafana-dashboards/kubernetes-cluster-dashboard.json @@ -0,0 +1,1671 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", + "editable": true, + "gnetId": 162, + "graphTooltip": 1, + "id": 12, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 4, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Cluster memory usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(0, 0, 0, 0)", + "rgb(210, 1, 1)", + "#890f02" + ], + "datasource": "prometheus", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(up{job=\"kubelet\"}) BY (job)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "1.1", + "title": "Up Nodes", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 6, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Cluster CPU usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [], + "datasource": "prometheus", + "fontSize": "90%", + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 25, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 2, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "alias": "Uptime", + "colorMode": null, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/endpoint|job|namespace|pod|service/", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "preserveFormat": false, + "sanitize": false, + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(time() - node_boot_time_seconds)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Node Uptime", + "transform": "table", + "transparent": true, + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 15, + "panels": [], + "title": "Nodes", + "type": "row" + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 3500000000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "4m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Memory Usage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3500000000 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 90 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "15m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "CPU Usage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", + "format": "time_series", + "intervalFactor": 3, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 90 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "prometheus", + "fontSize": "100%", + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 31, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "condition|container|daemonset|endpoint|namespace|node", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ALERTS{alertstate=\"firing\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + }, + { + "expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}", + "format": "table", + "hide": true, + "intervalFactor": 1, + "refId": "B" + } + ], + "title": "Active Alerts", + "transform": "table", + "type": "table" + }, + { + "dashboardFilter": "", + "folderId": null, + "gridPos": { + "h": 9, + "w": 5, + "x": 12, + "y": 17 + }, + "id": 27, + "limit": 10, + "links": [], + "nameFilter": "", + "onlyAlertsOnDashboard": false, + "show": "current", + "sortOrder": 1, + "stateFilter": [], + "title": "Alarms", + "type": "alertlist" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "prometheus", + "decimals": null, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 17 + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Cluster Filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 1 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Node Down", + "noDataState": "alerting", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 4, + "w": 7, + "x": 17, + "y": 22 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(up{job=\"kubelet\"}) BY (job)", + "format": "time_series", + "hide": true, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Up Nodes", + "refId": "A" + }, + { + "expr": "count(up{job=\"kubelet\"})", + "format": "time_series", + "hide": true, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Total Nodes", + "refId": "B" + }, + { + "expr": "avg(up{job=\"kubelet\"}) BY (job)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "C" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 1 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Up Nodes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "CPU Temperature alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rpi_cpu_temperature_celsius", + "format": "time_series", + "intervalFactor": 5, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Temperature", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "celsius", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 17, + "panels": [], + "title": "Pods", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 3, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 270, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[1m] ) ))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ container_name}}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pod CPU usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 2, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 250, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (container_name, image))", + "format": "time_series", + "hide": true, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (container_name))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ container_name }}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pod memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 550, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])) by (name))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }}", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sent Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 1, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 10, + "max": 8, + "min": 0, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 21, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 150, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,sum(rate(container_network_receive_bytes_total{name=~\".+\"}[5m])) by (name))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 240 + }, + { + "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Received Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 1, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 8, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Receive Traffic", + "metric": "network", + "refId": "A", + "step": 10 + }, + { + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Transmit Traffic", + "metric": "network", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Pod Network i/o", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "custom" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kubernetes cluster monitoring (via Prometheus)", + "uid": "82pBZCmRk", + "version": 2 + } \ No newline at end of file diff --git a/grafana-dashboards/Prometheus2.0-1520350511205.json b/grafana-dashboards/prometheus-dashboard.json similarity index 96% rename from grafana-dashboards/Prometheus2.0-1520350511205.json rename to grafana-dashboards/prometheus-dashboard.json index de29bb0..07bdd50 100644 --- a/grafana-dashboards/Prometheus2.0-1520350511205.json +++ b/grafana-dashboards/prometheus-dashboard.json @@ -1,53 +1,4 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - }, - { - "name": "VAR_SCRAPE_INTERVAL", - "type": "constant", - "label": "Scrape interval seconds", - "value": "60", - "description": "" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "text", - "name": "Text", - "version": "5.0.0" - } - ], "annotations": { "list": [ { @@ -95,8 +46,8 @@ "editable": true, "gnetId": 3681, "graphTooltip": 1, - "id": null, - "iteration": 1520350506982, + "id": 13, + "iteration": 1549118131383, "links": [ { "icon": "info", @@ -141,7 +92,7 @@ "rgba(237, 129, 40, 0.89)", "#bf1b00" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "decimals": 1, "format": "s", "gauge": { @@ -223,7 +174,7 @@ "rgba(237, 129, 40, 0.89)", "#bf1b00" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "format": "short", "gauge": { "maxValue": 1000000, @@ -304,7 +255,7 @@ "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "format": "none", "gauge": { "maxValue": 100, @@ -385,7 +336,7 @@ "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "decimals": 2, "format": "ms", "gauge": { @@ -483,7 +434,7 @@ "rgba(237, 129, 40, 0.89)", "#299c46" ], - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "decimals": 1, "format": "none", "gauge": { @@ -620,6 +571,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Query elapsed time", "tooltip": { @@ -653,7 +605,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -717,6 +673,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Head series created/deleted", "tooltip": { @@ -750,7 +707,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -898,6 +859,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Prometheus errors", "tooltip": { @@ -931,7 +893,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -1003,6 +969,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Scrape delay (counts with 1m scrape interval)", "tooltip": { @@ -1034,7 +1001,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -1096,6 +1067,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Rule evaulation duration", "tooltip": { @@ -1129,7 +1101,11 @@ "min": "0", "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -1200,6 +1176,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Request count", "tooltip": { @@ -1233,7 +1210,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -1291,6 +1272,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Request duration per handler", "tooltip": { @@ -1324,7 +1306,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -1380,6 +1366,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Request size by handler", "tooltip": { @@ -1413,7 +1400,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -1489,6 +1480,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Cont of concurent queries", "tooltip": { @@ -1522,7 +1514,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -1610,6 +1606,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Alert queue size", "tooltip": { @@ -1643,7 +1640,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -1700,6 +1701,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Count of discovered alertmanagers", "tooltip": { @@ -1733,7 +1735,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -1801,6 +1807,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Alerting errors", "tooltip": { @@ -1834,7 +1841,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -1855,7 +1866,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -1897,6 +1908,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Consul SD sync count", "tooltip": { @@ -1929,14 +1941,18 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -1978,6 +1994,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Marathon SD sync count", "tooltip": { @@ -2010,14 +2027,18 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -2059,6 +2080,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Kubernetes SD sync count", "tooltip": { @@ -2091,7 +2113,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -2189,6 +2215,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Service discovery errors", "tooltip": { @@ -2222,7 +2249,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -2243,7 +2274,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -2284,6 +2315,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Reloaded block from disk", "tooltip": { @@ -2316,7 +2348,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -2373,6 +2409,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Loaded data blocks", "tooltip": { @@ -2406,7 +2443,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -2463,6 +2504,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Time series total count", "tooltip": { @@ -2496,7 +2538,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -2548,6 +2594,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Samples Appended per second", "tooltip": { @@ -2581,7 +2628,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -2659,6 +2710,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Head chunks count", "tooltip": { @@ -2692,14 +2744,18 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -2740,6 +2796,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Length of head block", "tooltip": { @@ -2772,7 +2829,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -2834,6 +2895,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Head Chunks Created/Deleted per second", "tooltip": { @@ -2867,7 +2929,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -2888,7 +2954,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -2929,6 +2995,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Compaction duration", "tooltip": { @@ -2961,14 +3028,18 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -3009,6 +3080,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Go Garbage collection duration", "tooltip": { @@ -3041,14 +3113,18 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -3089,6 +3165,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "WAL truncate duration seconds", "tooltip": { @@ -3121,14 +3198,18 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": "prometheus", "fill": 1, "gridPos": { "h": 7, @@ -3169,6 +3250,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "WAL fsync duration seconds", "tooltip": { @@ -3201,7 +3283,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -3307,6 +3393,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Memory", "tooltip": { @@ -3340,7 +3427,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -3399,6 +3490,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Allocations per second", "tooltip": { @@ -3432,7 +3524,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -3488,6 +3584,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "CPU per second", "tooltip": { @@ -3523,7 +3620,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -4895,6 +4996,7 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Net errors", "tooltip": { @@ -4928,7 +5030,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": "5m", @@ -5009,12 +5115,17 @@ ], "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", "refresh": 2, + "skipUrlSync": false, "type": "interval" }, { "allValue": null, - "current": {}, + "current": { + "text": "10.32.0.53:9090", + "value": "10.32.0.53:9090" + }, "datasource": "$datasource", + "definition": "", "hide": 0, "includeAll": false, "label": "Instance", @@ -5024,6 +5135,7 @@ "query": "label_values(prometheus_build_info, instance)", "refresh": 2, "regex": "", + "skipUrlSync": false, "sort": 2, "tagValuesQuery": "", "tags": [], @@ -5033,19 +5145,20 @@ }, { "current": { - "value": "${VAR_SCRAPE_INTERVAL}", - "text": "${VAR_SCRAPE_INTERVAL}" + "text": "60", + "value": "60" }, "hide": 0, "label": "Scrape interval seconds", "name": "scrape_interval", "options": [ { - "value": "${VAR_SCRAPE_INTERVAL}", - "text": "${VAR_SCRAPE_INTERVAL}" + "text": "60", + "value": "60" } ], - "query": "${VAR_SCRAPE_INTERVAL}", + "query": "60", + "skipUrlSync": false, "type": "constant" }, { @@ -5060,6 +5173,7 @@ "query": "prometheus", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -5074,12 +5188,13 @@ "query": "influxdb", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" } ] }, "time": { - "from": "now-7d", + "from": "now-3h", "to": "now" }, "timepicker": { @@ -5110,5 +5225,5 @@ "timezone": "browser", "title": "Prometheus2.0", "uid": "XmsJC9mRz", - "version": 4 + "version": 1 } \ No newline at end of file diff --git a/grafana-dashboards/Kubernetes cluster monitoring.json b/grafana-dashboards/traefik-dashboard.json similarity index 53% rename from grafana-dashboards/Kubernetes cluster monitoring.json rename to grafana-dashboards/traefik-dashboard.json index aebed5c..2c0ad50 100644 --- a/grafana-dashboards/Kubernetes cluster monitoring.json +++ b/grafana-dashboards/traefik-dashboard.json @@ -1,52 +1,4 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "panel", - "id": "alertlist", - "name": "Alert List", - "version": "5.0.0" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.2.2" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "5.0.0" - } - ], "annotations": { "list": [ { @@ -60,42 +12,53 @@ } ] }, - "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ", + "description": "Traefik dashboard prometheus", "editable": true, - "gnetId": 162, - "graphTooltip": 1, - "id": null, + "gnetId": 5851, + "graphTooltip": 0, + "id": 20, + "iteration": 1549118220486, "links": [], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 21, + "panels": [], + "title": "General", + "type": "row" + }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ - "rgba(50, 172, 45, 0.97)", + "#d44a3a", "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" + "#299c46" ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "percent", + "datasource": "prometheus", + "format": "none", "gauge": { "maxValue": 100, "minValue": 0, - "show": true, + "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 7, - "w": 8, + "w": 3, "x": 0, - "y": 0 + "y": 1 }, - "id": 4, + "id": 13, "interval": null, - "isNew": true, "links": [], "mappingType": 1, "mappingTypes": [ @@ -126,21 +89,19 @@ "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", - "show": false + "show": true }, "tableColumn": "", "targets": [ { - "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", + "expr": "count(kube_pod_status_ready{namespace=\"$namespace\",condition=\"true\",pod=~\"traefik.*\"})", "format": "time_series", - "interval": "10s", "intervalFactor": 1, - "refId": "A", - "step": 10 + "refId": "A" } ], - "thresholds": "65, 90", - "title": "Cluster memory usage", + "thresholds": "", + "title": "Traefik instances", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -154,15 +115,15 @@ }, { "cacheTimeout": null, - "colorBackground": true, + "colorBackground": false, "colorValue": false, "colors": [ - "rgba(0, 0, 0, 0)", - "rgb(210, 1, 1)", - "#890f02" + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" ], - "datasource": "${DS_PROMETHEUS}", - "format": "percentunit", + "datasource": "prometheus", + "format": "s", "gauge": { "maxValue": 100, "minValue": 0, @@ -171,12 +132,12 @@ "thresholdMarkers": true }, "gridPos": { - "h": 2, - "w": 8, - "x": 8, - "y": 0 + "h": 7, + "w": 3, + "x": 3, + "y": 1 }, - "id": 23, + "id": 37, "interval": null, "links": [], "mappingType": 1, @@ -213,18 +174,96 @@ "tableColumn": "", "targets": [ { - "expr": "avg(up{job=\"kubelet\"}) BY (job)", + "expr": "time() - max(process_start_time_seconds{job=~\"traefik.*\"})", "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "", + "intervalFactor": 2, "refId": "A" } ], - "thresholds": "1.1", - "title": "Up Nodes", + "thresholds": "", + "title": "Uptime", "type": "singlestat", - "valueFontSize": "120%", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "prometheus", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 6, + "y": 1 + }, + "id": 39, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(traefik_entrypoint_request_duration_seconds_sum) / sum(traefik_entrypoint_requests_total) * 1000", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Average response time", + "type": "singlestat", + "valueFontSize": "80%", "valueMaps": [ { "op": "=", @@ -235,859 +274,63 @@ "valueName": "avg" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "aliasColors": { + "Latency over 1 min": "rgb(9, 116, 190)", + "Latency over 5 min": "#bf1b00" }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 2, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 6, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster CPU usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "columns": [], - "datasource": "${DS_PROMETHEUS}", - "fontSize": "90%", - "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 2 - }, - "id": 25, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 2, - "desc": false - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "link": false, - "pattern": "Time", - "type": "date" - }, - { - "alias": "Uptime", - "colorMode": null, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/endpoint|job|namespace|pod|service/", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "preserveFormat": false, - "sanitize": false, - "thresholds": [], - "type": "string", - "unit": "short" - } - ], - "targets": [ - { - "expr": "(time() - node_boot_time_seconds)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "Node Uptime", - "transform": "table", - "transparent": true, - "type": "table" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 15, - "panels": [], - "title": "Nodes", - "type": "row" - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 3500000000 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "4m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Memory Usage alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 3500000000 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 90 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "15m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Usage alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 8 + "w": 14, + "x": 10, + "y": 1 }, "id": 11, "legend": { - "avg": false, + "avg": true, "current": false, - "max": false, + "max": true, "min": false, "show": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)", - "format": "time_series", - "intervalFactor": 3, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 90 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "percent", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "columns": [], - "datasource": "${DS_PROMETHEUS}", - "fontSize": "100%", - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 31, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "link": false, - "pattern": "Time", - "type": "date" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "condition|container|daemonset|endpoint|namespace|node", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "ALERTS{alertstate=\"firing\"}", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - }, - { - "expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}", - "format": "table", - "hide": true, - "intervalFactor": 1, - "refId": "B" - } - ], - "title": "Active Alerts", - "transform": "table", - "type": "table" - }, - { - "dashboardFilter": "", - "folderId": null, - "gridPos": { - "h": 9, - "w": 5, - "x": 12, - "y": 17 - }, - "id": 27, - "limit": 10, - "links": [], - "nameFilter": "", - "onlyAlertsOnDashboard": false, - "show": "current", - "sortOrder": 1, - "stateFilter": [], - "title": "Alarms", - "type": "alertlist" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 7, - "x": 17, - "y": 17 - }, - "id": 7, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "metric": "", - "refId": "A", - "step": 10 - } - ], - "thresholds": "65, 90", - "title": "Cluster Filesystem usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1 - ], - "type": "lt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "C", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Node Down", - "noDataState": "alerting", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 4, - "w": 7, - "x": 17, - "y": 22 - }, - "id": 29, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "Latency over 5 min", + "yaxis": 1 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(up{job=\"kubelet\"}) BY (job)", - "format": "time_series", - "hide": true, - "instant": false, - "intervalFactor": 1, - "legendFormat": "Up Nodes", - "refId": "A" - }, - { - "expr": "count(up{job=\"kubelet\"})", - "format": "time_series", - "hide": true, - "instant": false, - "intervalFactor": 1, - "legendFormat": "Total Nodes", - "refId": "B" - }, - { - "expr": "avg(up{job=\"kubelet\"}) BY (job)", + "expr": "histogram_quantile(0.$percentiles, sum(rate(traefik_entrypoint_request_duration_seconds_bucket{namespace=\"$namespace\", code=\"200\",method=\"GET\"}[5m])) by (le))", "format": "time_series", "hide": false, "intervalFactor": 1, - "refId": "C" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "lt", - "value": 1 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Up Nodes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "CPU Temperature alert", - "noDataState": "no_data", - "notifications": [] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rpi_cpu_temperature_celsius", - "format": "time_series", - "intervalFactor": 5, - "legendFormat": "{{instance}}", + "legendFormat": "Latency over 1 min", "refId": "A" } ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 85 - } - ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "CPU Temperature", + "title": "Global latency $percentiles th perc over 5 min", "tooltip": { "shared": true, "sort": 0, @@ -1103,11 +346,11 @@ }, "yaxes": [ { - "format": "celsius", + "format": "ms", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -1116,332 +359,6 @@ "logBase": 1, "max": null, "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 17, - "panels": [], - "title": "Pods", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 3, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 270, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\"}[1m] ) ))", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ container_name}}", - "metric": "container_cpu", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Pod CPU usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 40 - }, - "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 250, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (container_name, image))", - "format": "time_series", - "hide": true, - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "{{ container_name }}", - "metric": "container_memory_usage:sort_desc", - "refId": "A", - "step": 10 - }, - { - "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (container_name))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ container_name }}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Pod memory usage", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 47 - }, - "id": 19, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 550, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])) by (name))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ name }}", - "refId": "A", - "step": 240 - }, - { - "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Sent Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 1, - "value_type": "cumulative" - }, - "transparent": false, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 10, - "max": 8, - "min": 0, "show": false } ], @@ -1455,36 +372,28 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, + "datasource": "prometheus", "fill": 1, - "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 47 + "w": 24, + "x": 0, + "y": 8 }, - "id": 21, + "id": 29, "legend": { "alignAsTable": true, "avg": true, "current": false, - "hideEmpty": true, - "hideZero": true, - "max": false, + "max": true, "min": false, - "rightSide": false, + "rightSide": true, "show": true, - "sideWidth": 150, - "sort": "avg", - "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], "nullPointMode": "null", "percentage": false, @@ -1497,34 +406,23 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10,sum(rate(container_network_receive_bytes_total{name=~\".+\"}[5m])) by (name))", + "expr": "histogram_quantile(0.$percentiles, rate(traefik_entrypoint_request_duration_seconds_bucket{namespace=\"$namespace\",code=\"200\",method=\"GET\"}[5m]))", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Received Network Traffic per Container", + "title": "Per node latency $percentiles th perc over 5 min", "tooltip": { - "msResolution": true, "shared": true, - "sort": 1, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, - "transparent": false, "type": "graph", "xaxis": { "buckets": null, @@ -1535,7 +433,107 @@ }, "yaxes": [ { - "format": "Bps", + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 17, + "panels": [], + "title": "Frontends (entrypoints)", + "type": "row" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 41, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_entrypoint_requests_total[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Total requests", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total requests over 5min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", "label": null, "logBase": 1, "max": null, @@ -1561,38 +559,119 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, + "datasource": "prometheus", + "fill": 7, "gridPos": { "h": 7, - "w": 24, + "w": 12, "x": 0, - "y": 54 + "y": 23 }, - "id": 8, - "isNew": true, + "id": 19, "legend": { "alignAsTable": true, "avg": true, "current": true, - "max": false, + "max": true, "min": false, "rightSide": true, "show": true, - "sideWidth": 200, - "sort": "current", + "sort": "avg", "sortDesc": true, "total": false, "values": true }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "connected", + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_entrypoint_open_connections{namespace=\"$namespace\"}) by (method)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ method }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Open Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -1603,35 +682,691 @@ "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", + "expr": "sum(rate(traefik_entrypoint_request_duration_seconds_bucket{namespace=\"$namespace\",le=\"0.1\",code=\"200\"}[5m])) by (job) / sum(rate(traefik_entrypoint_request_duration_seconds_count{namespace=\"$namespace\",code=\"200\"}[5m])) by (job)", "format": "time_series", - "interval": "10s", "intervalFactor": 1, - "legendFormat": "Receive Traffic", - "metric": "network", - "refId": "A", - "step": 10 - }, - { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", - "format": "time_series", - "interval": "10s", - "intervalFactor": 1, - "legendFormat": "Transmit Traffic", - "metric": "network", - "refId": "B", - "step": 10 + "legendFormat": "Code 200", + "refId": "A" } ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Pod Network i/o", + "title": "Apdex score (over 5 min)", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 24, + "panels": [], + "title": "Backends", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 7, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 31 + }, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_backend_open_connections{namespace=\"$namespace\"}) by (method)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ method }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Open Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 31 + }, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_backend_request_duration_seconds_bucket{namespace=\"$namespace\",le=\"0.1\",code=\"200\"}[5m])) by (job) / sum(rate(traefik_backend_request_duration_seconds_count{namespace=\"$namespace\",code=\"200\"}[5m])) by (job)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Code 200", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Apdex score (over 5 min)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 15, + "panels": [], + "title": "HTTP Codes stats", + "type": "row" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code=~\"2..\"}[5m])) by (method, code)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{method}} : {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status code 2xx over 5min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code=~\"5..\"}[5m])) by (method, code)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{method}} : {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status code 5xx over 5min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\"}[1m])) by (backend)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ backend }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Backend total requests over 1min per backend", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 48 + }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sortDesc": false, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code!~\"2..|5..\"}[5m])) by (method, code)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ method }} : {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Others status code over 5min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 57 + }, + "id": 35, + "panels": [], + "title": "Pods ressources", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 58 + }, + "id": 31, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max memory used", + "refId": "A" + }, + { + "expr": "avg(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requested memory usage", + "refId": "B" + }, + { + "expr": "avg(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Limit memory usage", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Traefik max memory usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -1650,6 +1385,106 @@ "min": null, "show": true }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 58 + }, + "id": 33, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max cpu used", + "refId": "A" + }, + { + "expr": "avg(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requested cpu usage", + "refId": "B" + }, + { + "expr": "avg(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Limit cpu usage", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Traefik max CPU usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, { "format": "short", "label": null, @@ -1665,14 +1500,68 @@ } } ], - "refresh": "10s", "schemaVersion": 16, "style": "dark", "tags": [ - "custom" + "traefik" ], "templating": { - "list": [] + "list": [ + { + "allValue": null, + "current": { + "text": "kube-system", + "value": "kube-system" + }, + "datasource": "prometheus", + "definition": "", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(traefik_config_reloads_total, namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "tags": [], + "text": "99", + "value": "99" + }, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentiles", + "options": [ + { + "selected": false, + "text": "95", + "value": "95" + }, + { + "selected": true, + "text": "99", + "value": "99" + } + ], + "query": "95,99", + "skipUrlSync": false, + "type": "custom" + } + ] }, "time": { "from": "now-3h", @@ -1703,8 +1592,8 @@ "30d" ] }, - "timezone": "browser", - "title": "Kubernetes cluster monitoring (via Prometheus)", - "uid": "82pBZCmRk", - "version": 37 -} + "timezone": "", + "title": "Traefik", + "uid": "000000113", + "version": 3 +} \ No newline at end of file diff --git a/operator_stack.jsonnet b/operator_stack.jsonnet index 71150ef..42abbee 100644 --- a/operator_stack.jsonnet +++ b/operator_stack.jsonnet @@ -1,281 +1,249 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; -local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + { +local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + + // (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') + + (import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet') + + (import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') + + (import 'image_sources_versions.jsonnet') + + { _config+:: { - namespace: "monitoring", + namespace: "monitoring", - urls+:: { - prom_externalUrl: 'http://prometheus.internal.carlosedp.com', - alert_externalUrl: 'http://alertmanager.internal.carlosedp.com', - grafana_externalUrl: 'http://grafana.internal.carlosedp.com/', + urls+:: { + prom_externalUrl: 'http://prometheus.internal.carlosedp.com', + alert_externalUrl: 'http://alertmanager.internal.carlosedp.com', + grafana_externalUrl: 'http://grafana.internal.carlosedp.com/', - prom_ingress: 'prometheus.internal.carlosedp.com', - alert_ingress: 'alertmanager.internal.carlosedp.com', - grafana_ingress: 'grafana.internal.carlosedp.com', - }, - - versions+:: { - prometheus: "v2.5.0", - alertmanager: "v0.15.3", - kubeStateMetrics: "v1.5.0", - kubeRbacProxy: "v0.4.1", - addonResizer: "2.1", - nodeExporter: "v0.17.0", - prometheusOperator: "v0.28.0", - prometheusAdapter: "v0.4.1", - grafana: "v5.4.0", - configmapReloader: "v0.2.2", - prometheusConfigReloader: "v0.28.0", - }, - - imageRepos+:: { - prometheus: "carlosedp/prometheus", - alertmanager: "carlosedp/alertmanager", - kubeStateMetrics: "carlosedp/kube-state-metrics", - kubeRbacProxy: "carlosedp/kube-rbac-proxy", - addonResizer: "carlosedp/addon-resizer", - nodeExporter: "carlosedp/node_exporter", - prometheusOperator: "carlosedp/prometheus-operator", - prometheusAdapter: "carlosedp/k8s-prometheus-adapter", - grafana: "carlosedp/monitoring-grafana", - configmapReloader: "carlosedp/configmap-reload", - prometheusConfigReloader: "carlosedp/prometheus-config-reloader", - }, - - prometheus+:: { - names: 'k8s', - replicas: 1, - namespaces: ["default", "kube-system","monitoring","logging"], - }, - - alertmanager+:: { - name: 'main', - config: ||| - global: - resolve_timeout: 5m - route: - group_by: ['job'] - group_wait: 30s - group_interval: 5m - repeat_interval: 12h - receiver: 'null' - routes: - - match: - alertname: DeadMansSwitch - receiver: 'null' - receivers: - - name: 'null' - |||, - replicas: 1, - // Configure External URL's - alertmanager+: { - spec+: { - externalUrl: $._config.urls.alert_externalUrl, + prom_ingress: 'prometheus.internal.carlosedp.com', + alert_ingress: 'alertmanager.internal.carlosedp.com', + grafana_ingress: 'grafana.internal.carlosedp.com', + grafana_ingress_external: 'grafana.cloud.carlosedp.com', }, - }, - }, - kubeStateMetrics+:: { - collectors: '', // empty string gets a default set - scrapeInterval: '30s', - scrapeTimeout: '30s', + prometheus+:: { + names: 'k8s', + replicas: 1, + namespaces: ["default", "kube-system", "monitoring", "logging", "metallb-system"], + }, - baseCPU: '100m', - baseMemory: '150Mi', - cpuPerNode: '2m', - memoryPerNode: '30Mi', - }, + alertmanager+:: { + replicas: 1, + }, - grafana+:: { - config: { - sections: { - database: { path: '/data/grafana.db' }, - paths: { - data: '/var/lib/grafana', - logs: '/var/lib/grafana/log', - plugins: '/var/lib/grafana/plugins', - provisioning: '/etc/grafana/provisioning', - }, - session: { provider: 'memory' }, - 'auth.basic': {enabled: false}, - 'auth.anonymous': {enabled: false}, - smtp: { - enabled: true, - host: 'smtp-server.monitoring.svc:25', - user: '', - password: '', - from_address:'carlosedp@gmail.com', - from_name: 'Grafana Alert', - skip_verify: true + kubeStateMetrics+:: { + collectors: '', // empty string gets a default set + scrapeInterval: '30s', + scrapeTimeout: '30s', + + baseCPU: '100m', + baseMemory: '150Mi', + cpuPerNode: '2m', + memoryPerNode: '30Mi', + }, + + grafana+:: { + config: { + sections: { + // database: { path: '/data/grafana.db' }, + // paths: { + // data: '/var/lib/grafana', + // logs: '/var/lib/grafana/log', + // plugins: '/var/lib/grafana/plugins', + // provisioning: '/etc/grafana/provisioning', + // }, + session: { provider: 'memory' }, + 'auth.basic': {enabled: false}, + 'auth.anonymous': {enabled: false}, + smtp: { + enabled: true, + host: 'smtp-server.monitoring.svc:25', + user: '', + password: '', + from_address:'carlosedp@gmail.com', + from_name: 'Grafana Alert', + skip_verify: true + }, + }, }, }, - }, }, - //--------------------------------------- // End of _config //--------------------------------------- - }, prometheus+:: { - local pvc = k.core.v1.persistentVolumeClaim, - - prometheus+: { - spec+: { - retention: '15d', - externalUrl: $._config.urls.prom_externalUrl, - storage: { - volumeClaimTemplate: - pvc.new() + - pvc.mixin.spec.withAccessModes('ReadWriteOnce') + - pvc.mixin.spec.resources.withRequests({ storage: '20Gi' }) + - pvc.mixin.spec.withStorageClassName('nfs-ssd-node1'), - }, + local pvc = k.core.v1.persistentVolumeClaim, + prometheus+: { + spec+: { + retention: '15d', + externalUrl: $._config.urls.prom_externalUrl, + storage: { + volumeClaimTemplate: + pvc.new() + + pvc.mixin.spec.withAccessModes('ReadWriteOnce') + + pvc.mixin.spec.resources.withRequests({ storage: '20Gi' }) + # Uncomment below to define a StorageClass name + #+ pvc.mixin.spec.withStorageClassName('nfs-master-ssd'), + }, + }, }, - }, }, - # Override command for Grafana to load ini from correct path - // grafana+:: { - // deployment+: - // { - // local pvc = k.core.v1.persistentVolumeClaim, - // spec+: { - // volumeClaimTemplate: - // pvc.new() + - // pvc.mixin.metadata.withNamespace($._config.namespace) + - // pvc.mixin.metadata.withName("grafana-storage") + - // pvc.mixin.spec.withAccessModes('ReadWriteMany') + - // pvc.mixin.spec.resources.withRequests({ storage: '2Gi' }) + - // pvc.mixin.spec.withStorageClassName('nfs-ssd-node1'), - // template+: { - // spec+: { - // containers: - // std.map( - // function(c) - // if c.name == 'grafana' then - // c { - // args+: [ - // '-config=/etc/grafana/grafana.ini', - // ], - // } - // else - // c, - // super.containers, - // ), - // }, - // }, - // }, - // }, - // }, + # Override command for Grafana persist data + // grafana+:: { + // local pvc = k.core.v1.persistentVolumeClaim, + // local deployment = k.apps.v1beta2.deployment, + // local container = deployment.mixin.spec.template.spec.containersType, + // local containerVolumeMount = container.volumeMountsType, + // local volume = deployment.mixin.spec.template.spec.volumesType, + // local grafanaStorage = pvc.new() + pvc.mixin.metadata.withNamespace($._config.namespace) + + // pvc.mixin.metadata.withName("grafana-storage") + + // pvc.mixin.spec.withAccessModes('ReadWriteMany') + + // pvc.mixin.spec.resources.withRequests({ storage: '2Gi' }), + // deployment+: { + // // local storageVolumeName = grafanaStorage, + // // local storageVolume = volume.fromPersistentVolumeClaim(grafanaStorage), + // // }, + // // { + // spec+: { + // template+: { + // spec+: { + // containers: + // std.map( + // function(c) + // if c.name == 'grafana' then + // c { + // volumeMounts+: [ + // containerVolumeMount.new("grafana-storage", "/var/lib/grafana"), + // ], + // } + // else + // c, + // super.containers, + // ), + // volumes+: [ + // volume.fromPersistentVolumeClaim(grafanaStorage, "grafana-storage"), + // ], + // }, + // }, + // }, + // }, + // }, - // Override command for addon-resizer due to change from parameter --threshold to --acceptance-offset - kubeStateMetrics+:: { - deployment+: { - spec+: { - template+: { - spec+: { - containers: - std.filterMap( - function(c) c.name == 'addon-resizer', - function(c) - if std.startsWith(c.name, 'addon-resizer') then - c { - command: [ - '/pod_nanny', - '--container=kube-state-metrics', - '--cpu=' + $._config.kubeStateMetrics.baseCPU, - '--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode, - '--memory=' + $._config.kubeStateMetrics.baseMemory, - '--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode, - '--acceptance-offset=5', - '--deployment=kube-state-metrics', - ], + // Add custom dashboards + grafanaDashboards+:: { + 'kubernetes-cluster-dashboard.json': (import 'grafana-dashboards/kubernetes-cluster-dashboard.json'), + 'prometheus-dashboard.json': (import 'grafana-dashboards/prometheus-dashboard.json'), + 'traefik-dashboard.json': (import 'grafana-dashboards/traefik-dashboard.json'), + }, + kubeStateMetrics+:: { + // Override command for addon-resizer due to change from parameter --threshold to --acceptance-offset + deployment+: { + spec+: { + template+: { + spec+: { + containers: + std.map( + function(c) + if std.startsWith(c.name, 'addon-resizer') then + c { + command: [ + '/pod_nanny', + '--container=kube-state-metrics', + '--cpu=100m', + '--extra-cpu=2m', + '--memory=150Mi', + '--extra-memory=30Mi', + '--acceptance-offset=5', + '--deployment=kube-state-metrics', + ], + } + else + c, + super.containers, + ), }, - super.containers, - ), - }, + }, + }, }, - }, }, - }, // Create ingress objects per application - ingress+: { - local secret = k.core.v1.secret, - local ingress = k.extensions.v1beta1.ingress, - local ingressTls = ingress.mixin.spec.tlsType, - local ingressRule = ingress.mixin.spec.rulesType, - local httpIngressPath = ingressRule.mixin.http.pathsType, + ingress+: { + local secret = k.core.v1.secret, + local ingress = k.extensions.v1beta1.ingress, + local ingressTls = ingress.mixin.spec.tlsType, + local ingressRule = ingress.mixin.spec.rulesType, + local httpIngressPath = ingressRule.mixin.http.pathsType, - 'alertmanager-main': - ingress.new() + - ingress.mixin.metadata.withName('alertmanager-main') + - ingress.mixin.metadata.withNamespace($._config.namespace) + - // ingress.mixin.metadata.withAnnotations({ - // 'nginx.ingress.kubernetes.io/auth-type': 'basic', - // 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', - // 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', - // }) + - ingress.mixin.spec.withRules( - ingressRule.new() + - ingressRule.withHost($._config.urls.alert_ingress) + - ingressRule.mixin.http.withPaths( - httpIngressPath.new() + - httpIngressPath.withPath('/') + - httpIngressPath.mixin.backend.withServiceName('alertmanager-main') + - httpIngressPath.mixin.backend.withServicePort('web') - ), - ), - 'grafana': - ingress.new() + - ingress.mixin.metadata.withName('grafana') + - ingress.mixin.metadata.withNamespace($._config.namespace) + - // ingress.mixin.metadata.withAnnotations({ - // 'nginx.ingress.kubernetes.io/auth-type': 'basic', - // 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', - // 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', - // }) + - ingress.mixin.spec.withRules( - ingressRule.new() + - ingressRule.withHost($._config.urls.grafana_ingress) + - ingressRule.mixin.http.withPaths( - httpIngressPath.new() + - httpIngressPath.withPath('/') + - httpIngressPath.mixin.backend.withServiceName('grafana') + - httpIngressPath.mixin.backend.withServicePort('http') - ), - ), - 'prometheus-k8s': - ingress.new() + - ingress.mixin.metadata.withName('prometheus-k8s') + - ingress.mixin.metadata.withNamespace($._config.namespace) + - // ingress.mixin.metadata.withAnnotations({ - // 'nginx.ingress.kubernetes.io/auth-type': 'basic', - // 'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth', - // 'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required', - // }) + - ingress.mixin.spec.withRules( - ingressRule.new() + - ingressRule.withHost($._config.urls.prom_ingress) + - ingressRule.mixin.http.withPaths( - httpIngressPath.new() + - httpIngressPath.withPath('/') + - httpIngressPath.mixin.backend.withServiceName('prometheus-k8s') + - httpIngressPath.mixin.backend.withServicePort('web') - ), - ), - }, - }; - // + { - // Create basic auth secret - replace 'auth' file with your own - // Create with htpasswd -c auth [USERNAME] -// ingress+:: { -// 'basic-auth-secret': -// secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) + -// secret.mixin.metadata.withNamespace($._config.namespace), -// }, -// }; + 'alertmanager-main': + ingress.new() + + ingress.mixin.metadata.withName('alertmanager-main') + + ingress.mixin.metadata.withNamespace($._config.namespace) + + ingress.mixin.spec.withRules( + ingressRule.new() + + ingressRule.withHost($._config.urls.alert_ingress) + + ingressRule.mixin.http.withPaths( + httpIngressPath.new() + + httpIngressPath.withPath('/') + + httpIngressPath.mixin.backend.withServiceName('alertmanager-main') + + httpIngressPath.mixin.backend.withServicePort('web') + ), + ), + 'grafana': + ingress.new() + + ingress.mixin.metadata.withName('grafana') + + ingress.mixin.metadata.withNamespace($._config.namespace) + + ingress.mixin.spec.withRules( + ingressRule.new() + + ingressRule.withHost($._config.urls.grafana_ingress) + + ingressRule.mixin.http.withPaths( + httpIngressPath.new() + + httpIngressPath.withPath('/') + + httpIngressPath.mixin.backend.withServiceName('grafana') + + httpIngressPath.mixin.backend.withServicePort('http') + ), + ), + 'prometheus-k8s': + ingress.new() + + ingress.mixin.metadata.withName('prometheus-k8s') + + ingress.mixin.metadata.withNamespace($._config.namespace) + + ingress.mixin.spec.withRules( + ingressRule.new() + + ingressRule.withHost($._config.urls.prom_ingress) + + ingressRule.mixin.http.withPaths( + httpIngressPath.new() + + httpIngressPath.withPath('/') + + httpIngressPath.mixin.backend.withServiceName('prometheus-k8s') + + httpIngressPath.mixin.backend.withServicePort('web') + ), + ), + // 'grafana-external': + // // Example external ingress with authentication + // ingress.new() + + // ingress.mixin.metadata.withName('grafana-external') + + // ingress.mixin.metadata.withNamespace($._config.namespace) + + // ingress.mixin.metadata.withLabels({'traffic-type': 'external'}) + + // ingress.mixin.metadata.withAnnotations({ + // 'ingress.kubernetes.io/auth-type': 'basic', + // 'ingress.kubernetes.io/auth-secret': 'basic-auth', + // }) + + // ingress.mixin.spec.withRules( + // ingressRule.new() + + // ingressRule.withHost($._config.urls.grafana_ingress_external) + + // ingressRule.mixin.http.withPaths( + // httpIngressPath.new() + + // httpIngressPath.withPath('/') + + // httpIngressPath.mixin.backend.withServiceName('grafana') + + // httpIngressPath.mixin.backend.withServicePort('http') + // ), + // ), + // 'basic-auth-secret': + // // First generate the auth secret with gen_auth.sh script + // secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) + + // secret.mixin.metadata.withNamespace($._config.namespace), + }, + }; { ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } + { ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +