diff --git a/Makefile b/Makefile index ca262f4..71ed8f9 100644 --- a/Makefile +++ b/Makefile @@ -27,10 +27,6 @@ deploy: manifests sleep 40 kubectl apply -f ./manifests/ -ingressip: - @perl -p -i -e 's/^(\s*)\-\ host:.*/\1- host: alertmanager.${IP}.nip.io/g' manifests/ingress-alertmanager-main.yaml manifests/ingress-prometheus-k8s.yaml manifests/ingress-grafana.yaml - @echo "Ingress IPs changed to [service].${IP}.nip.io" - teardown: kubectl delete -f ./manifests/ @@ -50,3 +46,10 @@ ifeq (, $(shell which jsonnet)) @go get github.com/google/go-jsonnet/cmd/jsonnet @go get github.com/brancz/gojsontoyaml endif + +change_suffix: + @perl -p -i -e 's/^(\s*)\-\ host:.*/\1- host: alertmanager.${IP}.nip.io/g' manifests/ingress-alertmanager-main.yaml manifests/ingress-prometheus-k8s.yaml manifests/ingress-grafana.yaml + @echo "Ingress IPs changed to [service].${IP}.nip.io" + ${K3S} kubectl apply -f manifests/ingress-alertmanager-main.yaml + ${K3S} kubectl apply -f manifests/ingress-grafana.yaml + ${K3S} kubectl apply -f manifests/ingress-prometheus-k8s.yaml diff --git a/Readme.md b/Readme.md index fa23aaa..923f1b4 100644 --- a/Readme.md +++ b/Readme.md @@ -69,13 +69,18 @@ Now to deploy the monitoring stack on your K3s cluster, there are three paramete After changing these values, run `make` to build the manifests and `k3s kubectl apply -f manifests/` to apply the stack to your cluster. In case of errors on some resources, re-run the command. -If you already have the manifests and don't want to rebuilt from Jsonnet, just run `make ingressip IP="[IP-ADDRESS]"` to change the IP for the ingress routes for Grafana, Prometheus and Alertmanager. Re-apply the ingress manifests after this. +Now you can open the applications: -Now you can open the applications: * Grafana on [https://grafana.[your_node_ip].nip.io](https://grafana.[your_node_ip].nip.io), * Prometheus on [https://prometheus.[your_node_ip].nip.io](https://prometheus.[your_node_ip].nip.io) * Alertmanager on [https://alertmanager.[your_node_ip].nip.io](https://alertmanager.[your_node_ip].nip.io) +There are some dashboards that shows no values due to some cadvisor metrics not having the complete metadata. Check the open issues for more information. + +## Updating the ingress suffixes + +To avoid rebuilding all manifests, there is a make target to update the Ingress URL suffix to a different suffix (using nip.io) to match your host IP. Run `make change_suffix IP="[IP-ADDRESS]"` to change the ingress route IP for Grafana, Prometheus and Alertmanager and reapply the manifests. If you have a K3s cluster, run `make change_suffix IP="[IP-ADDRESS] K3S=k3s`. + ## Customizing The content of this project consists of a set of jsonnet files making up a library to be consumed. diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index c5d3ebd..48bcf8b 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -25743,1786 +25743,4 @@ items: metadata: name: grafana-dashboard-statefulset namespace: monitoring -- apiVersion: v1 - data: - traefik-dashboard.json: |- - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Traefik dashboard prometheus", - "editable": true, - "gnetId": 5851, - "graphTooltip": 0, - "id": 20, - "iteration": 1549118220486, - "links": [ - - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 21, - "panels": [ - - ], - "title": "General", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": "prometheus", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 3, - "x": 0, - "y": 1 - }, - "id": 13, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(kube_pod_status_ready{namespace=\"$namespace\",condition=\"true\",pod=~\"traefik.*\"})", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Traefik instances", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "prometheus", - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 3, - "x": 3, - "y": 1 - }, - "id": 37, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "time() - max(process_start_time_seconds{job=~\"traefik.*\"})", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "Uptime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "prometheus", - "format": "ms", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 4, - "x": 6, - "y": 1 - }, - "id": 39, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(traefik_entrypoint_request_duration_seconds_sum) / sum(traefik_entrypoint_requests_total) * 1000", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "Average response time", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "aliasColors": { - "Latency over 1 min": "rgb(9, 116, 190)", - "Latency over 5 min": "#bf1b00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 2, - "gridPos": { - "h": 7, - "w": 14, - "x": 10, - "y": 1 - }, - "id": 11, - "legend": { - "avg": true, - "current": false, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [ - - ], - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Latency over 5 min", - "yaxis": 1 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.$percentiles, sum(rate(traefik_entrypoint_request_duration_seconds_bucket{namespace=\"$namespace\", code=\"200\",method=\"GET\"}[5m])) by (le))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Latency over 1 min", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Global latency $percentiles th perc over 5 min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 29, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.$percentiles, rate(traefik_entrypoint_request_duration_seconds_bucket{namespace=\"$namespace\",code=\"200\",method=\"GET\"}[5m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Per node latency $percentiles th perc over 5 min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 17, - "panels": [ - - ], - "title": "Frontends (entrypoints)", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 16 - }, - "id": 41, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traefik_entrypoint_requests_total[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Total requests", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Total requests over 5min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 7, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 23 - }, - "id": 19, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(traefik_entrypoint_open_connections{namespace=\"$namespace\"}) by (method)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ method }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Open Connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 23 - }, - "id": 22, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traefik_entrypoint_request_duration_seconds_bucket{namespace=\"$namespace\",le=\"0.1\",code=\"200\"}[5m])) by (job) / sum(rate(traefik_entrypoint_request_duration_seconds_count{namespace=\"$namespace\",code=\"200\"}[5m])) by (job)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Code 200", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Apdex score (over 5 min)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 24, - "panels": [ - - ], - "title": "Backends", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 7, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 31 - }, - "id": 25, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(traefik_backend_open_connections{namespace=\"$namespace\"}) by (method)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ method }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Open Connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 31 - }, - "id": 26, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traefik_backend_request_duration_seconds_bucket{namespace=\"$namespace\",le=\"0.1\",code=\"200\"}[5m])) by (job) / sum(rate(traefik_backend_request_duration_seconds_count{namespace=\"$namespace\",code=\"200\"}[5m])) by (job)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Code 200", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Apdex score (over 5 min)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 38 - }, - "id": 15, - "panels": [ - - ], - "title": "HTTP Codes stats", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 39 - }, - "id": 5, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code=~\"2..\"}[5m])) by (method, code)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{method}} : {{code}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Status code 2xx over 5min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 39 - }, - "id": 27, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code=~\"5..\"}[5m])) by (method, code)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{method}} : {{code}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Status code 5xx over 5min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 48 - }, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\"}[1m])) by (backend)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ backend }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Backend total requests over 1min per backend", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 6, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "sortDesc": false, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code!~\"2..|5..\"}[5m])) by (method, code)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ method }} : {{code}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Others status code over 5min", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 57 - }, - "id": 35, - "panels": [ - - ], - "title": "Pods ressources", - "type": "row" - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 58 - }, - "id": 31, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Max memory used", - "refId": "A" - }, - { - "expr": "avg(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Requested memory usage", - "refId": "B" - }, - { - "expr": "avg(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Limit memory usage", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Traefik max memory usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 58 - }, - "id": 33, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"}[1m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Max cpu used", - "refId": "A" - }, - { - "expr": "avg(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Requested cpu usage", - "refId": "B" - }, - { - "expr": "avg(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Limit cpu usage", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Traefik max CPU usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "schemaVersion": 16, - "style": "dark", - "tags": [ - "traefik" - ], - "templating": { - "list": [ - { - "allValue": null, - "current": { - "text": "kube-system", - "value": "kube-system" - }, - "datasource": "prometheus", - "definition": "", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(traefik_config_reloads_total, namespace)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "tags": [ - - ], - "text": "99", - "value": "99" - }, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "percentiles", - "options": [ - { - "selected": false, - "text": "95", - "value": "95" - }, - { - "selected": true, - "text": "99", - "value": "99" - } - ], - "query": "95,99", - "skipUrlSync": false, - "type": "custom" - } - ] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Traefik", - "uid": "000000113", - "version": 3 - } - kind: ConfigMap - metadata: - name: grafana-dashboard-traefik-dashboard - namespace: monitoring kind: ConfigMapList diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index 279587e..a8c7959 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -96,9 +96,6 @@ spec: - mountPath: /grafana-dashboard-definitions/0/statefulset name: grafana-dashboard-statefulset readOnly: false - - mountPath: /grafana-dashboard-definitions/0/traefik-dashboard - name: grafana-dashboard-traefik-dashboard - readOnly: false - mountPath: /etc/grafana name: grafana-config readOnly: false @@ -171,9 +168,6 @@ spec: - configMap: name: grafana-dashboard-statefulset name: grafana-dashboard-statefulset - - configMap: - name: grafana-dashboard-traefik-dashboard - name: grafana-dashboard-traefik-dashboard - name: grafana-config secret: secretName: grafana-config diff --git a/manifests/kube-state-metrics-deployment.yaml b/manifests/kube-state-metrics-deployment.yaml index 251f18a..27a77bb 100644 --- a/manifests/kube-state-metrics-deployment.yaml +++ b/manifests/kube-state-metrics-deployment.yaml @@ -17,15 +17,46 @@ spec: spec: containers: - args: - - --port=8080 - - --telemetry-port=8081 + - --logtostderr + - --secure-listen-address=:8443 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:8081/ + image: carlosedp/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy-main + ports: + - containerPort: 8443 + name: https-main + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + - args: + - --logtostderr + - --secure-listen-address=:9443 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:8082/ + image: carlosedp/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy-self + ports: + - containerPort: 9443 + name: https-self + resources: + limits: + cpu: 20m + memory: 40Mi + requests: + cpu: 10m + memory: 20Mi + - args: + - --host=127.0.0.1 + - --port=8081 + - --telemetry-host=127.0.0.1 + - --telemetry-port=8082 image: carlosedp/kube-state-metrics:v1.7.2 name: kube-state-metrics - ports: - - containerPort: 8080 - name: http-main - - containerPort: 8081 - name: http-self resources: limits: cpu: 100m diff --git a/manifests/kube-state-metrics-service.yaml b/manifests/kube-state-metrics-service.yaml index 2179e7e..84927af 100644 --- a/manifests/kube-state-metrics-service.yaml +++ b/manifests/kube-state-metrics-service.yaml @@ -8,11 +8,11 @@ metadata: spec: clusterIP: None ports: - - name: http-main - port: 8080 - targetPort: http-main - - name: http-self - port: 8081 - targetPort: http-self + - name: https-main + port: 8443 + targetPort: https-main + - name: https-self + port: 9443 + targetPort: https-self selector: app: kube-state-metrics diff --git a/manifests/kube-state-metrics-serviceMonitor.yaml b/manifests/kube-state-metrics-serviceMonitor.yaml index 6b4fc15..2100449 100644 --- a/manifests/kube-state-metrics-serviceMonitor.yaml +++ b/manifests/kube-state-metrics-serviceMonitor.yaml @@ -7,16 +7,18 @@ metadata: namespace: monitoring spec: endpoints: - - honorLabels: true + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true interval: 30s - port: http-main - scheme: http + port: https-main + scheme: https scrapeTimeout: 30s tlsConfig: insecureSkipVerify: true - - interval: 30s - port: http-self - scheme: http + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https-self + scheme: https tlsConfig: insecureSkipVerify: true jobLabel: k8s-app diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml index d261e01..b6a62ff 100644 --- a/manifests/node-exporter-daemonset.yaml +++ b/manifests/node-exporter-daemonset.yaml @@ -16,7 +16,7 @@ spec: spec: containers: - args: - - --web.listen-address=:9100 + - --web.listen-address=127.0.0.1:9100 - --path.procfs=/host/proc - --path.sysfs=/host/sys - --path.rootfs=/host/root @@ -24,9 +24,6 @@ spec: - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$ image: prom/node-exporter:v0.18.1 name: node-exporter - ports: - - containerPort: 9100 - name: http resources: limits: cpu: 250m @@ -45,6 +42,29 @@ spec: mountPropagation: HostToContainer name: root readOnly: true + - args: + - --logtostderr + - --secure-listen-address=$(IP):9100 + - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + - --upstream=http://127.0.0.1:9100/ + env: + - name: IP + valueFrom: + fieldRef: + fieldPath: status.podIP + image: carlosedp/kube-rbac-proxy:v0.4.1 + name: kube-rbac-proxy + ports: + - containerPort: 9100 + hostPort: 9100 + name: https + resources: + limits: + cpu: 20m + memory: 60Mi + requests: + cpu: 10m + memory: 20Mi hostNetwork: true hostPID: true nodeSelector: diff --git a/manifests/node-exporter-service.yaml b/manifests/node-exporter-service.yaml index 6f32c7d..1d728d7 100644 --- a/manifests/node-exporter-service.yaml +++ b/manifests/node-exporter-service.yaml @@ -8,8 +8,8 @@ metadata: spec: clusterIP: None ports: - - name: http + - name: https port: 9100 - targetPort: http + targetPort: https selector: app: node-exporter diff --git a/manifests/node-exporter-serviceMonitor.yaml b/manifests/node-exporter-serviceMonitor.yaml index 63b4ad2..89d65be 100644 --- a/manifests/node-exporter-serviceMonitor.yaml +++ b/manifests/node-exporter-serviceMonitor.yaml @@ -7,8 +7,9 @@ metadata: namespace: monitoring spec: endpoints: - - interval: 30s - port: http + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + interval: 30s + port: https relabelings: - action: replace regex: (.*) @@ -16,7 +17,9 @@ spec: sourceLabels: - __meta_kubernetes_pod_node_name targetLabel: instance - scheme: http + scheme: https + tlsConfig: + insecureSkipVerify: true jobLabel: k8s-app selector: matchLabels: diff --git a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryEndpoints.yaml b/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryEndpoints.yaml deleted file mode 100644 index 2739fe8..0000000 --- a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryEndpoints.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v1 -kind: Endpoints -metadata: - labels: - k8s-app: kube-controller-manager - name: kube-controller-manager-prometheus-discovery - namespace: kube-system -subsets: -- addresses: - - ip: 192.168.99.100 - ports: - - name: http-metrics - port: 10252 - protocol: TCP diff --git a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml index 7e5707b..9506973 100644 --- a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml +++ b/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml @@ -12,4 +12,4 @@ spec: port: 10252 targetPort: 10252 selector: - k8s-app: kube-controller-manager + component: kube-controller-manager diff --git a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryEndpoints.yaml b/manifests/prometheus-kubeSchedulerPrometheusDiscoveryEndpoints.yaml deleted file mode 100644 index 8aafbac..0000000 --- a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryEndpoints.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v1 -kind: Endpoints -metadata: - labels: - k8s-app: kube-scheduler - name: kube-scheduler-prometheus-discovery - namespace: kube-system -subsets: -- addresses: - - ip: 192.168.99.100 - ports: - - name: http-metrics - port: 10251 - protocol: TCP diff --git a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml index 70f1551..b4843c7 100644 --- a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml +++ b/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml @@ -12,4 +12,4 @@ spec: port: 10251 targetPort: 10251 selector: - k8s-app: kube-scheduler + component: kube-scheduler diff --git a/manifests/prometheus-serviceMonitorKubelet.yaml b/manifests/prometheus-serviceMonitorKubelet.yaml index 91da377..71b95a5 100644 --- a/manifests/prometheus-serviceMonitorKubelet.yaml +++ b/manifests/prometheus-serviceMonitorKubelet.yaml @@ -8,25 +8,15 @@ metadata: spec: endpoints: - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - honorLabels: true interval: 30s - port: https-metrics - scheme: https - tlsConfig: - insecureSkipVerify: true + port: http-metrics + scheme: http - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token honorLabels: true interval: 30s - metricRelabelings: - - action: drop - regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) - sourceLabels: - - __name__ path: /metrics/cadvisor - port: https-metrics - scheme: https - tlsConfig: - insecureSkipVerify: true + port: http-metrics + scheme: http jobLabel: k8s-app namespaceSelector: matchNames: diff --git a/manifests/traefikexporter-serviceMonitor.yaml b/manifests/traefikexporter-serviceMonitor.yaml deleted file mode 100644 index fdd3333..0000000 --- a/manifests/traefikexporter-serviceMonitor.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - k8s-app: traefik-ingress-lb - name: traefik-ingress-lb - namespace: monitoring -spec: - endpoints: - - interval: 30s - port: admin - scheme: http - jobLabel: k8s-app - namespaceSelector: - matchNames: - - kube-system - selector: - matchLabels: - k8s-app: traefik-ingress-lb diff --git a/vars.jsonnet b/vars.jsonnet index fcb9347..a3f2118 100644 --- a/vars.jsonnet +++ b/vars.jsonnet @@ -18,7 +18,7 @@ }, { name: 'traefikExporter', - enabled: true, + enabled: false, file: import 'traefik.jsonnet', }, { @@ -29,9 +29,8 @@ ], k3s: { - enabled: true, + enabled: false, master_ip: '192.168.99.100' - }, // Domain suffix for the ingresses