From 357a39a627de2ae29eca8e7c7e23aac84c95863f Mon Sep 17 00:00:00 2001 From: Carlos de Paula Date: Wed, 17 Jun 2020 15:39:59 -0300 Subject: [PATCH] Add options for pre-created PVs --- Readme.md | 4 +++- base_operator_stack.jsonnet | 7 +++++-- manifests/grafana-dashboardDefinitions.yaml | 17 ++++++++++++++--- manifests/prometheus-rules.yaml | 8 ++++---- vars.jsonnet | 7 ++++++- 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/Readme.md b/Readme.md index 9a1cd00..a7ec134 100644 --- a/Readme.md +++ b/Readme.md @@ -36,7 +36,9 @@ There are also options to set the ingress domain suffix and enable persistence f The ingresses can use TLS with the default self-signed certificate from your Ingress controller by setting `TLSingress` to `true` and use a custom certificate by creating the files `server.crt` and `server.key` and enabling the `UseProvidedCerts` parameter at `vars.jsonnet`. -Changing these parameters require a rebuild of the manifests with `make`. +Persistence for Prometheus and Grafana can be enabled in the `enablePersistence` section. Setting each to `true`, creates the volume PVCs. If no PV names are defined in `prometheusPV` and `grafanaPV`, the default StorageClass will be used to dynamically create the PVs The sizes can be adjusted in `prometheusSizePV` and `grafanaSizePV`. + +Changing these parameters require a rebuild of the manifests with `make` followed by `make deploy`. ## Quickstart (non K3s) diff --git a/base_operator_stack.jsonnet b/base_operator_stack.jsonnet index dce203a..d408c73 100644 --- a/base_operator_stack.jsonnet +++ b/base_operator_stack.jsonnet @@ -69,6 +69,7 @@ local vars = import 'vars.jsonnet'; local pvc = k.core.v1.persistentVolumeClaim, prometheus+: { spec+: { + // Here one can use parameters from https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec replicas: $._config.prometheus.replicas, retention: '15d', externalUrl: 'http://' + $._config.urls.prom_ingress, @@ -78,7 +79,8 @@ local vars = import 'vars.jsonnet'; volumeClaimTemplate: pvc.new() + pvc.mixin.spec.withAccessModes('ReadWriteOnce') + - pvc.mixin.spec.resources.withRequests({ storage: '20Gi' }), + pvc.mixin.spec.resources.withRequests({ storage: vars.enablePersistence.prometheusSizePV }) + + (if vars.enablePersistence.prometheusPV != '' then pvc.mixin.spec.withVolumeName(vars.enablePersistence.prometheusPV)), // Uncomment below to define a StorageClass name //+ pvc.mixin.spec.withStorageClassName('nfs-master-ssd'), }, @@ -120,7 +122,8 @@ local vars = import 'vars.jsonnet'; pvc.mixin.metadata.withNamespace($._config.namespace) + pvc.mixin.metadata.withName('grafana-storage') + pvc.mixin.spec.withAccessModes('ReadWriteOnce') + - pvc.mixin.spec.resources.withRequests({ storage: '2Gi' }), + pvc.mixin.spec.resources.withRequests({ storage: vars.enablePersistence.grafanaSizePV }) + + (if vars.enablePersistence.grafanaPV != '' then pvc.mixin.spec.withVolumeName(vars.enablePersistence.grafanaPV)), } else {}, grafanaDashboards+:: $._config.grafanaDashboards, diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 2d0a140..1d3ce55 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -6467,6 +6467,7 @@ items: "fill": 1, "format": "percentunit", "id": 1, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -7275,7 +7276,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7694,7 +7695,7 @@ items: ], "targets": [ { - "expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)", + "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -7820,6 +7821,7 @@ items: "datasource": "$datasource", "fill": 1, "id": 11, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -10236,6 +10238,7 @@ items: "datasource": "$datasource", "fill": 1, "id": 9, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13185,6 +13188,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 6, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13283,6 +13287,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 7, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13381,6 +13386,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 8, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13479,6 +13485,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 9, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13577,6 +13584,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 10, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -13675,6 +13683,7 @@ items: "datasource": "$datasource", "fill": 10, "id": 11, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -14666,6 +14675,7 @@ items: "datasource": "$datasource", "fill": 1, "id": 5, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -16824,6 +16834,7 @@ items: "datasource": "$datasource", "fill": 1, "id": 5, + "interval": "1m", "legend": { "avg": false, "current": false, @@ -17850,7 +17861,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", + "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{workload}}", diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 932ca50..91c34ce 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -1391,6 +1391,10 @@ spec: {{ $labels.verb }} {{ $labels.resource }}. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh expr: | + cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} + > + 1 + and on (verb,resource) ( cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} > @@ -1402,10 +1406,6 @@ spec: ) ) > on (verb) group_left() 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0) - and on (verb,resource) - cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} - > - 1 for: 5m labels: severity: warning diff --git a/vars.jsonnet b/vars.jsonnet index 0b36718..d197cdd 100644 --- a/vars.jsonnet +++ b/vars.jsonnet @@ -52,10 +52,15 @@ TLSCertificate: importstr 'server.crt', TLSKey: importstr 'server.key', - // Setting these to false, defaults to emptyDirs + // Setting these to false, defaults to emptyDirs. + // If using a pre-created PV, fill in the names. If blank, they will use the default StorageClass enablePersistence: { prometheus: false, grafana: false, + prometheusPV: '', + prometheusSizePV: '2Gi', + grafanaPV: '', + grafanaSizePV: '20Gi', }, // Grafana "from" email