From 422e0610f4447fe4ad37531dec5b8e94777e8061 Mon Sep 17 00:00:00 2001 From: Laur IVAN Date: Fri, 27 Feb 2026 00:18:56 +0100 Subject: [PATCH] refactor: migrate observability applications to Flux v2 HelmRelease and OCI repository definitions. --- .taskfiles/tools/Taskfile.yaml | 14 + Taskfile.yaml | 22 +- .../apps/observability/grafana/app.ks.yaml | 17 + .../grafana/app/externalsecret.yaml | 22 ++ .../grafanadashboard.yaml} | 3 +- .../grafana/app/helmrelease.yaml | 16 + .../grafana/app/kustomization.yaml | 9 + .../ocirepository.yaml} | 11 +- .../grafana/app/secrets.sops.yaml | 23 ++ .../observability/grafana/instance.ks.yaml | 18 +- .../instance/admin.externalsecret.yaml | 21 - .../grafana/instance/grafana.yaml | 96 +++-- ...ta-sources.yaml => grafanadatasource.yaml} | 9 +- .../grafana/instance/kustomization.yaml | 7 +- ...rvice-monitor.yaml => servicemonitor.yaml} | 11 +- .../observability/grafana/kustomization.yaml | 6 +- .../observability/grafana/operator.ks.yaml | 15 - .../grafana/operator/helm-release.yaml | 27 -- .../kube-prometheus-stack/app.ks.yaml | 25 +- .../app/alertmanagerconfig.yaml | 88 +++++ .../app/grafana-dashboards.yaml | 143 ------- .../app/grafanadashboard.yaml | 270 +++++++++++++ .../app/helm-release.yaml | 207 ---------- .../app/helmrelease.yaml | 165 ++++++++ .../app/kustomization.yaml | 19 + ...oci-repository.yaml => ocirepository.yaml} | 11 +- .../app/resources/flux-metrics.yaml | 370 ++++++++++++++++++ .../app/scrapeconfig.yaml | 31 ++ .../app/secrets.sops.yaml | 8 + .../apps/observability/kustomization.yaml | 6 +- 30 files changed, 1197 insertions(+), 493 deletions(-) create mode 100644 .taskfiles/tools/Taskfile.yaml create mode 100644 kubernetes/apps/observability/grafana/app.ks.yaml create mode 100644 kubernetes/apps/observability/grafana/app/externalsecret.yaml rename kubernetes/apps/observability/grafana/{operator/grafana-dashboard.yaml => app/grafanadashboard.yaml} (70%) create mode 100644 kubernetes/apps/observability/grafana/app/helmrelease.yaml create mode 100644 kubernetes/apps/observability/grafana/app/kustomization.yaml rename kubernetes/apps/observability/grafana/{operator/oci-repository.yaml => app/ocirepository.yaml} (59%) create mode 100644 kubernetes/apps/observability/grafana/app/secrets.sops.yaml delete mode 100644 kubernetes/apps/observability/grafana/instance/admin.externalsecret.yaml rename kubernetes/apps/observability/grafana/instance/{data-sources.yaml => grafanadatasource.yaml} (60%) rename kubernetes/apps/observability/grafana/instance/{service-monitor.yaml => servicemonitor.yaml} (56%) delete mode 100644 kubernetes/apps/observability/grafana/operator.ks.yaml delete mode 100644 kubernetes/apps/observability/grafana/operator/helm-release.yaml create mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/alertmanagerconfig.yaml delete mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/grafana-dashboards.yaml create mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/grafanadashboard.yaml delete mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/helm-release.yaml create mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml create mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/kustomization.yaml rename kubernetes/apps/observability/kube-prometheus-stack/app/{oci-repository.yaml => ocirepository.yaml} (61%) create mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/resources/flux-metrics.yaml create mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/scrapeconfig.yaml create mode 100644 kubernetes/apps/observability/kube-prometheus-stack/app/secrets.sops.yaml diff --git a/.taskfiles/tools/Taskfile.yaml b/.taskfiles/tools/Taskfile.yaml new file mode 100644 index 0000000..46256f6 --- /dev/null +++ b/.taskfiles/tools/Taskfile.yaml @@ -0,0 +1,14 @@ +--- +version: "3" + +tasks: + routes: + desc: List known routes in envoy gateway + dir: "{{.ROOT_DIR}}" + cmds: + - kubectl get httproute -A + preconditions: + - test -f {{.KUBECONFIG}} + - test -f {{.ROOT_DIR}}/.sops.yaml + - test -f {{.SCRIPTS_DIR}}/bootstrap-apps.sh + - test -f {{.SOPS_AGE_KEY_FILE}} diff --git a/Taskfile.yaml b/Taskfile.yaml index 0442ee1..9cc8253 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -1,29 +1,29 @@ --- -version: '3' +version: "3" set: [pipefail] shopt: [globstar] vars: - BOOTSTRAP_DIR: '{{.ROOT_DIR}}/bootstrap' - KUBERNETES_DIR: '{{.ROOT_DIR}}/kubernetes' - SCRIPTS_DIR: '{{.ROOT_DIR}}/scripts' - TALOS_DIR: '{{.ROOT_DIR}}/talos' - PRIVATE_DIR: '{{.ROOT_DIR}}/.private' - TALOSCONFIG: '{{.ROOT_DIR}}/talos/clusterconfig/talosconfig' + BOOTSTRAP_DIR: "{{.ROOT_DIR}}/bootstrap" + KUBERNETES_DIR: "{{.ROOT_DIR}}/kubernetes" + SCRIPTS_DIR: "{{.ROOT_DIR}}/scripts" + TALOS_DIR: "{{.ROOT_DIR}}/talos" + PRIVATE_DIR: "{{.ROOT_DIR}}/.private" + TALOSCONFIG: "{{.ROOT_DIR}}/talos/clusterconfig/talosconfig" env: - KUBECONFIG: '{{.ROOT_DIR}}/kubeconfig' - SOPS_AGE_KEY_FILE: '{{.ROOT_DIR}}/age.key' - TALOSCONFIG: '{{.TALOSCONFIG}}' + KUBECONFIG: "{{.ROOT_DIR}}/kubeconfig" + SOPS_AGE_KEY_FILE: "{{.ROOT_DIR}}/age.key" + TALOSCONFIG: "{{.TALOSCONFIG}}" includes: bootstrap: .taskfiles/bootstrap talos: .taskfiles/talos template: .taskfiles/template + tools: .taskfiles/tools tasks: - default: task --list reconcile: diff --git a/kubernetes/apps/observability/grafana/app.ks.yaml b/kubernetes/apps/observability/grafana/app.ks.yaml new file mode 100644 index 0000000..1ef22e2 --- /dev/null +++ b/kubernetes/apps/observability/grafana/app.ks.yaml @@ -0,0 +1,17 @@ +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: grafana + namespace: &namespace observability +spec: + interval: 1h + path: ./kubernetes/apps/observability/grafana/app + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + targetNamespace: *namespace + wait: true diff --git a/kubernetes/apps/observability/grafana/app/externalsecret.yaml b/kubernetes/apps/observability/grafana/app/externalsecret.yaml new file mode 100644 index 0000000..2b87e26 --- /dev/null +++ b/kubernetes/apps/observability/grafana/app/externalsecret.yaml @@ -0,0 +1,22 @@ +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/external-secrets.io/externalsecret_v1.json +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: grafana +spec: + secretStoreRef: + kind: ClusterSecretStore + name: bitwarden + target: + name: grafana-secret + template: + data: + # Authentik + GF_AUTH_GENERIC_OAUTH_CLIENT_ID: '{{ .GRAFANA_CLIENT_ID }}' + GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: '{{ .GRAFANA_CLIENT_SECRET }}' + # admin user + GF_SECURITY_ADMIN_PASSWORD: '{{ .GRAFANA_ADMIN_PASSWORD }}' + dataFrom: + - extract: + key: grafana diff --git a/kubernetes/apps/observability/grafana/operator/grafana-dashboard.yaml b/kubernetes/apps/observability/grafana/app/grafanadashboard.yaml similarity index 70% rename from kubernetes/apps/observability/grafana/operator/grafana-dashboard.yaml rename to kubernetes/apps/observability/grafana/app/grafanadashboard.yaml index 03ca540..6665fee 100644 --- a/kubernetes/apps/observability/grafana/operator/grafana-dashboard.yaml +++ b/kubernetes/apps/observability/grafana/app/grafanadashboard.yaml @@ -1,4 +1,5 @@ --- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: @@ -13,4 +14,4 @@ spec: inputName: DS_PROMETHEUS configMapRef: name: grafana-operator-dashboard - key: grafana-operator.json \ No newline at end of file + key: grafana-operator.json diff --git a/kubernetes/apps/observability/grafana/app/helmrelease.yaml b/kubernetes/apps/observability/grafana/app/helmrelease.yaml new file mode 100644 index 0000000..4562df9 --- /dev/null +++ b/kubernetes/apps/observability/grafana/app/helmrelease.yaml @@ -0,0 +1,16 @@ +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: grafana-operator +spec: + interval: 1h + chartRef: + kind: OCIRepository + name: grafana-operator + values: + dashboard: + enabled: true + serviceMonitor: + enabled: true diff --git a/kubernetes/apps/observability/grafana/app/kustomization.yaml b/kubernetes/apps/observability/grafana/app/kustomization.yaml new file mode 100644 index 0000000..df980bb --- /dev/null +++ b/kubernetes/apps/observability/grafana/app/kustomization.yaml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./grafanadashboard.yaml + - ./helmrelease.yaml + - ./ocirepository.yaml + - ./secrets.sops.yaml diff --git a/kubernetes/apps/observability/grafana/operator/oci-repository.yaml b/kubernetes/apps/observability/grafana/app/ocirepository.yaml similarity index 59% rename from kubernetes/apps/observability/grafana/operator/oci-repository.yaml rename to kubernetes/apps/observability/grafana/app/ocirepository.yaml index ed34ec0..71df8b0 100644 --- a/kubernetes/apps/observability/grafana/operator/oci-repository.yaml +++ b/kubernetes/apps/observability/grafana/app/ocirepository.yaml @@ -1,13 +1,14 @@ --- +# yaml-language-server: $schema=https://schemas.tholinka.dev/source.toolkit.fluxcd.io/ocirepository_v1.json apiVersion: source.toolkit.fluxcd.io/v1 kind: OCIRepository metadata: name: grafana-operator spec: - interval: 15m - url: oci://ghcr.io/grafana/helm-charts/grafana-operator - ref: {tag: 5.21.4} - + interval: 1h layerSelector: mediaType: application/vnd.cncf.helm.chart.content.v1.tar+gzip - operation: copy \ No newline at end of file + operation: copy + ref: + tag: 5.22.0 + url: oci://ghcr.io/grafana/helm-charts/grafana-operator diff --git a/kubernetes/apps/observability/grafana/app/secrets.sops.yaml b/kubernetes/apps/observability/grafana/app/secrets.sops.yaml new file mode 100644 index 0000000..cbf5202 --- /dev/null +++ b/kubernetes/apps/observability/grafana/app/secrets.sops.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Secret +metadata: + name: ${APP}-volsync-secret +stringData: + #ENC[AES256_GCM,data:IvhoPyXVw75X8pU=,iv:1CWSPac9y9lBeLmxmHnrHeWFkAutYQXrQjtQEI8uEow=,tag:iOvsj3n1i757mnNDkL9sGA==,type:comment] + GF_SECURITY_ADMIN_PASSWORD: ENC[AES256_GCM,data:AznP+gnoEuJsyodnOT87BljKB/AbRnAluLZbdRcOqTN3jclA798=,iv:v3FD0nV6rUn1Vfn4lYYOMysre++6fnABj7SS/OyQxo8=,tag:/aSw3Hss8jW7+sFuhBpsIA==,type:str] +sops: + age: + - recipient: age1yzrqhl9dk8ljswpmzsqme3enad5kxxhsptdvecy3lwlq0ms80gaqxrctst + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBqRDRDT2JzZmF2RlcyREg5 + aEgyZ0QwNTJQK2JYbDBrNjRhT3BNSzdFZGlzCndQVloyK1RUU281S1Q2YnI4eXQv + RVoxa0UxOFNEVkZwQzB3ZUhTNHBMTWcKLS0tIGZLMTZ3YUs3d2FHWVBtczJzdzhp + dUtWdGJ0cjhjREI5YnVzVDk5VGJJS0kKpa+N5XC8a5/V/eUgqZoosxrio9CJMTYS + TzhILOHxY59zNtl4Jw7QtIy27jWki4+318WnQ2XGHO5yPUitc1yPuA== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2026-02-26T22:25:45Z" + mac: ENC[AES256_GCM,data:ktMVoEJxNiiHh4mSDG7c5IAWz04o+pl4iTXfqYloGy9CHRnnOdUV8bKbwPGiSExRm/IiRwKEWXcMsRKPJbz8QoXY8/W4slsMQ5dtnstTCDMJzp9ch2WipOce8C3nZOkTnwzDlmfpGMqr9AezeW/DnOryR6YbEbLeYH++VkMhD4E=,iv:MoI3qZZfivpvPopXkLW57ZJ9/7bFr+L621bBldG+6LI=,tag:60uNx8M+NBCjycVPDZ0ung==,type:str] + encrypted_regex: ^(data|stringData)$ + mac_only_encrypted: true + version: 3.11.0 diff --git a/kubernetes/apps/observability/grafana/instance.ks.yaml b/kubernetes/apps/observability/grafana/instance.ks.yaml index 122e5c3..d2f9b2d 100644 --- a/kubernetes/apps/observability/grafana/instance.ks.yaml +++ b/kubernetes/apps/observability/grafana/instance.ks.yaml @@ -1,21 +1,19 @@ ---- +# yaml-language-server: $schema=https://schemas.tholinka.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: name: grafana-instance + namespace: &namespace observability spec: + interval: 1h dependsOn: - - name: grafana-operator - - name: envoy-gateway - namespace: network-system - - name: openebs - namespace: storage-system + - name: grafana + - name: rook-ceph-cluster + namespace: rook-ceph path: ./kubernetes/apps/observability/grafana/instance + prune: true sourceRef: kind: GitRepository name: flux-system namespace: flux-system - targetNamespace: observability - interval: 1h - prune: true - wait: false + targetNamespace: *namespace diff --git a/kubernetes/apps/observability/grafana/instance/admin.externalsecret.yaml b/kubernetes/apps/observability/grafana/instance/admin.externalsecret.yaml deleted file mode 100644 index ed2ab71..0000000 --- a/kubernetes/apps/observability/grafana/instance/admin.externalsecret.yaml +++ /dev/null @@ -1,21 +0,0 @@ ---- -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: grafana-admin-password-secret -spec: - refreshInterval: 1h - - secretStoreRef: - name: onepassword - kind: ClusterSecretStore - - target: - name: grafana-admin-password-secret - creationPolicy: Owner - - data: - - secretKey: GF_SECURITY_ADMIN_PASSWORD - remoteRef: - key: Grafana Admin - property: password \ No newline at end of file diff --git a/kubernetes/apps/observability/grafana/instance/grafana.yaml b/kubernetes/apps/observability/grafana/instance/grafana.yaml index 16325f9..9346f25 100644 --- a/kubernetes/apps/observability/grafana/instance/grafana.yaml +++ b/kubernetes/apps/observability/grafana/instance/grafana.yaml @@ -1,30 +1,51 @@ --- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafana_v1beta1.json apiVersion: grafana.integreatly.org/v1beta1 kind: Grafana metadata: name: grafana labels: grafana.internal/instance: grafana - spec: config: analytics: - check_for_updates: "false" - check_for_plugin_updates: "false" - feedback_links_enabled: "false" - reporting_enabled: "false" - + check_for_updates: 'false' + check_for_plugin_updates: 'false' + feedback_links_enabled: 'false' + reporting_enabled: 'false' + auth: + disable_login_form: 'false' + signout_redirect_url: https://auth.tholinka.dev/application/o/grafana/end-session/ + oauth_auto_login: 'true' + oauth_allow_insecure_email_lookup: 'true' + auth.anonymous: + enabled: 'true' + org_id: '1' + org_name: Main Org. + org_role: Viewer + hide_version: 'true' + auth.generic_oauth: + name: authentik + enabled: 'true' + scopes: openid email profile + auth_url: https://auth.tholinka.dev/application/o/authorize/ + token_url: https://auth.tholinka.dev/application/o/token/ + api_url: https://auth.tholinka.dev/application/o/userinfo/ + # Optionally map user groups to Grafana roles + role_attribute_path: contains(groups[*], 'Grafana Admins') && 'Admin' || contains(groups[*], 'Grafana Editors') && 'Editor' || 'Viewer' + log: + mode: console + metrics: + enabled: 'true' + news: + news_feed_enabled: 'false' + plugins: + plugin_admin_enabled: 'false' + security: + angular_support_enabled: 'true' server: - enable_gzip: "true" - root_url: https://grafana.laurivan.com - - auth: { disable_login_form: "false" } - auth.anonymous: { enabled: "true" } - metrics: { enabled: "true" } - news: { news_feed_enabled: "false" } - plugins: { plugin_admin_enabled: "false" } - security: { angular_support_enabled: "true" } - + enable_gzip: 'true' + root_url: https://grafana.tholinka.dev deployment: spec: strategy: @@ -34,15 +55,29 @@ spec: containers: - name: grafana env: - - name: GF_SECURITY_ADMIN_PASSWORD + - name: &clientId GF_AUTH_GENERIC_OAUTH_CLIENT_ID valueFrom: secretKeyRef: - name: grafana-admin-password-secret - key: GF_SECURITY_ADMIN_PASSWORD + name: &secret grafana-secret + key: *clientId + - name: &clientSecret GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: *secret + key: *clientSecret + - name: &adminPass GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: *secret + key: *adminPass + - name: GF_DATE_FORMATS_USE_BROWSER_LOCALE + value: 'true' + - name: GF_SECURITY_COOKIE_SAMESITE + value: grafana securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true - capabilities: { drop: ["ALL"] } + capabilities: { drop: ['ALL'] } securityContext: runAsNonRoot: true runAsUser: 1000 @@ -53,23 +88,30 @@ spec: - name: grafana-data persistentVolumeClaim: claimName: grafana-pvc - httpRoute: + metadata: + annotations: + gethomepage.dev/enabled: 'true' + gethomepage.dev/group: Observability + gethomepage.dev/name: Grafana + gethomepage.dev/icon: grafana.svg + gethomepage.dev/pod-selector: app=grafana spec: - hostnames: ["grafana.laurivan.com"] + hostnames: + - grafana.tholinka.dev parentRefs: - - name: envoy-admin - namespace: network-system + - name: envoy-internal + namespace: network rules: - backendRefs: - name: grafana-service port: 3000 - persistentVolumeClaim: spec: - accessModes: ["ReadWriteOnce"] + accessModes: + - ReadWriteOnce resources: requests: storage: 10Gi - storageClassName: openebs-zfs + storageClassName: ceph-block disableDefaultSecurityContext: All diff --git a/kubernetes/apps/observability/grafana/instance/data-sources.yaml b/kubernetes/apps/observability/grafana/instance/grafanadatasource.yaml similarity index 60% rename from kubernetes/apps/observability/grafana/instance/data-sources.yaml rename to kubernetes/apps/observability/grafana/instance/grafanadatasource.yaml index 6de8ebf..78a1144 100644 --- a/kubernetes/apps/observability/grafana/instance/data-sources.yaml +++ b/kubernetes/apps/observability/grafana/instance/grafanadatasource.yaml @@ -1,4 +1,5 @@ --- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadatasource_v1beta1.json apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDatasource metadata: @@ -7,15 +8,14 @@ spec: instanceSelector: matchLabels: grafana.internal/instance: grafana - datasource: type: prometheus name: prometheus access: proxy isDefault: true - url: http://prometheus-operated:9090 - + url: http://prometheus-operated.observability.svc.cluster.local:9090 --- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadatasource_v1beta1.json apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDatasource metadata: @@ -24,11 +24,10 @@ spec: instanceSelector: matchLabels: grafana.internal/instance: grafana - datasource: type: alertmanager name: alertmanager access: proxy jsonData: implementation: prometheus - url: http://alertmanager-operated:9093 \ No newline at end of file + url: http://alertmanager-operated.observability.svc.cluster.local:9093 diff --git a/kubernetes/apps/observability/grafana/instance/kustomization.yaml b/kubernetes/apps/observability/grafana/instance/kustomization.yaml index 4799cfc..bd07360 100644 --- a/kubernetes/apps/observability/grafana/instance/kustomization.yaml +++ b/kubernetes/apps/observability/grafana/instance/kustomization.yaml @@ -1,9 +1,8 @@ --- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization - resources: - #- ./admin.externalsecret.yaml - - ./data-sources.yaml + - ./grafanadatasource.yaml - ./grafana.yaml - - ./service-monitor.yaml \ No newline at end of file + - ./servicemonitor.yaml diff --git a/kubernetes/apps/observability/grafana/instance/service-monitor.yaml b/kubernetes/apps/observability/grafana/instance/servicemonitor.yaml similarity index 56% rename from kubernetes/apps/observability/grafana/instance/service-monitor.yaml rename to kubernetes/apps/observability/grafana/instance/servicemonitor.yaml index 03ebc8d..114bdba 100644 --- a/kubernetes/apps/observability/grafana/instance/service-monitor.yaml +++ b/kubernetes/apps/observability/grafana/instance/servicemonitor.yaml @@ -1,19 +1,18 @@ --- +# yaml-language-server: $schema=https://schemas.tholinka.dev/monitoring.coreos.com/servicemonitor_v1.json apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: grafana spec: - jobLabel: grafana - endpoints: - port: grafana path: /metrics honorLabels: true - + jobLabel: grafana namespaceSelector: - matchNames: ["monitoring-system"] - + matchNames: + - observability selector: matchLabels: - grafana.internal/instance: grafana \ No newline at end of file + grafana.internal/instance: grafana diff --git a/kubernetes/apps/observability/grafana/kustomization.yaml b/kubernetes/apps/observability/grafana/kustomization.yaml index 74ee524..2cd449f 100644 --- a/kubernetes/apps/observability/grafana/kustomization.yaml +++ b/kubernetes/apps/observability/grafana/kustomization.yaml @@ -2,6 +2,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -resources: [] - #- ./operator.ks.yaml - #- ./instance.ks.yaml \ No newline at end of file +resources: + - ./app.ks.yaml + - ./instance.ks.yaml diff --git a/kubernetes/apps/observability/grafana/operator.ks.yaml b/kubernetes/apps/observability/grafana/operator.ks.yaml deleted file mode 100644 index a453f2f..0000000 --- a/kubernetes/apps/observability/grafana/operator.ks.yaml +++ /dev/null @@ -1,15 +0,0 @@ ---- -apiVersion: kustomize.toolkit.fluxcd.io/v1 -kind: Kustomization -metadata: - name: grafana-operator -spec: - path: ./kubernetes/apps/observability/grafana/operator - sourceRef: - kind: GitRepository - name: flux-system - namespace: flux-system - targetNamespace: observability - interval: 1h - prune: true - wait: true \ No newline at end of file diff --git a/kubernetes/apps/observability/grafana/operator/helm-release.yaml b/kubernetes/apps/observability/grafana/operator/helm-release.yaml deleted file mode 100644 index f983012..0000000 --- a/kubernetes/apps/observability/grafana/operator/helm-release.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: grafana-operator -spec: - interval: 10m - chartRef: - kind: OCIRepository - name: grafana-operator - - values: - replicas: 1 - resources: - requests: - cpu: 20m - memory: 32Mi - limits: - memory: 64Mi - - serviceMonitor: {enabled: true} - dashboard: {enabled: true} - - maxConcurrentReconciles: 4 - logging: - level: info - encoder: json \ No newline at end of file diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app.ks.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app.ks.yaml index 479ca4b..92843cf 100644 --- a/kubernetes/apps/observability/kube-prometheus-stack/app.ks.yaml +++ b/kubernetes/apps/observability/kube-prometheus-stack/app.ks.yaml @@ -1,3 +1,26 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app kube-prometheus-stack + namespace: flux-system +spec: + targetNamespace: observability + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/observability/kube-prometheus-stack/app + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + wait: false + interval: 30m + retryInterval: 1m + timeout: 15m + --- apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization @@ -17,4 +40,4 @@ spec: - name: openebs namespace: storage-system - name: envoy-gateway - namespace: network-system \ No newline at end of file + namespace: network-system diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/alertmanagerconfig.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/alertmanagerconfig.yaml new file mode 100644 index 0000000..0d1c07d --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/alertmanagerconfig.yaml @@ -0,0 +1,88 @@ +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/monitoring.coreos.com/alertmanagerconfig_v1alpha1.json +apiVersion: monitoring.coreos.com/v1alpha1 +kind: AlertmanagerConfig +metadata: + name: alertmanager +spec: + route: + groupBy: ["alertname", "job"] + groupInterval: 10m + groupWait: 1m + receiver: pushover + repeatInterval: 12h + routes: + - receiver: "null" + matchers: + - name: alertname + value: InfoInhibitor + matchType: = + - receiver: heartbeat + groupInterval: 5m + groupWait: 0s + repeatInterval: 5m + matchers: + - name: alertname + value: Watchdog + matchType: = + - receiver: pushover + matchers: + - name: severity + value: critical + matchType: = + inhibitRules: + - equal: ["alertname", "namespace"] + sourceMatch: + - name: severity + value: critical + matchType: = + targetMatch: + - name: severity + value: warning + matchType: = + receivers: + - name: "null" + - name: heartbeat + webhookConfigs: + - urlSecret: + name: &secret kube-prometheus-stack-secret + key: HEALTHCHECKS_IO_HEARTBEAT_URL + sendResolved: true + - name: pushover + pushoverConfigs: + - html: true + message: |- + {{- range .Alerts }} + {{- if ne .Annotations.description "" }} + {{ .Annotations.description }} + {{- else if ne .Annotations.summary "" }} + {{ .Annotations.summary }} + {{- else if ne .Annotations.message "" }} + {{ .Annotations.message }} + {{- else }} + Alert description not available + {{- end }} + {{- if gt (len .Labels.SortedPairs) 0 }} + + {{- range .Labels.SortedPairs }} + {{ .Name }}: {{ .Value }} + {{- end }} + + {{- end }} + {{- end }} + monospace: false + priority: |- + {{ if eq .Status "firing" }}1{{ else }}0{{ end }} + sendResolved: true + sound: gamelan + title: >- + [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] + {{ .CommonLabels.alertname }} + ttl: 86400s + token: + name: *secret + key: ALERTMANAGER_PUSHOVER_TOKEN + userKey: + name: *secret + key: PUSHOVER_USER_KEY + urlTitle: View in Alertmanager diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/grafana-dashboards.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/grafana-dashboards.yaml deleted file mode 100644 index 7ffdd7d..0000000 --- a/kubernetes/apps/observability/kube-prometheus-stack/app/grafana-dashboards.yaml +++ /dev/null @@ -1,143 +0,0 @@ ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: kubernetes-api-server -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/15761/revisions/20/download - ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: kubernetes-coredns -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/15762/revisions/22/download - ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: kubernetes-global -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/15757/revisions/43/download - ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: kubernetes-namespaces -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/15758/revisions/44/download - ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: kubernetes-nodes -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/15759/revisions/40/download - ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: kubernetes-pods -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/15760/revisions/37/download - ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: kubernetes-volumes -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/11454/revisions/14/download - ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: node-exporter-full -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/1860/revisions/42/download - ---- -# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json -apiVersion: grafana.integreatly.org/v1beta1 -kind: GrafanaDashboard -metadata: - name: prometheus -spec: - allowCrossNamespaceImport: true - instanceSelector: - matchLabels: - grafana.internal/instance: grafana - datasources: - - datasourceName: prometheus - inputName: DS_PROMETHEUS - url: https://grafana.com/api/dashboards/19105/revisions/8/download \ No newline at end of file diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/grafanadashboard.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/grafanadashboard.yaml new file mode 100644 index 0000000..0efcada --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/grafanadashboard.yaml @@ -0,0 +1,270 @@ +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: k8s-addons-prometheus +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://raw.githubusercontent.com/dotdc/grafana-dashboards-kubernetes/master/dashboards/k8s-addons-prometheus.json +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: k8s-system-api-server +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://raw.githubusercontent.com/dotdc/grafana-dashboards-kubernetes/master/dashboards/k8s-system-api-server.json +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: k8s-system-coredns +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://raw.githubusercontent.com/dotdc/grafana-dashboards-kubernetes/master/dashboards/k8s-system-coredns.json +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: k8s-views-global +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://raw.githubusercontent.com/dotdc/grafana-dashboards-kubernetes/master/dashboards/k8s-views-global.json +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: k8s-views-namespaces +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://raw.githubusercontent.com/dotdc/grafana-dashboards-kubernetes/master/dashboards/k8s-views-namespaces.json +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: k8s-views-nodes +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://raw.githubusercontent.com/dotdc/grafana-dashboards-kubernetes/master/dashboards/k8s-views-nodes.json +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: k8s-views-pods +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://raw.githubusercontent.com/dotdc/grafana-dashboards-kubernetes/master/dashboards/k8s-views-pods.json +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-api-server +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/15761/revisions/20/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-coredns +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/15762/revisions/22/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-global +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/15757/revisions/43/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-namespaces +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/15758/revisions/44/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-nodes +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/15759/revisions/40/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-pods +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/15760/revisions/37/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-volumes +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/11454/revisions/14/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: etcd-storage +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_VICTORIAMETRICS + url: https://grafana.com/api/dashboards/22236/revisions/4/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-pvc +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/23233/revisions/6/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: node-exporter-full +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/1860/revisions/42/download +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/grafana.integreatly.org/grafanadashboard_v1beta1.json +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: prometheus +spec: + allowCrossNamespaceImport: true + instanceSelector: + matchLabels: + grafana.internal/instance: grafana + datasources: + - datasourceName: prometheus + inputName: DS_PROMETHEUS + url: https://grafana.com/api/dashboards/19105/revisions/8/download diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/helm-release.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/helm-release.yaml deleted file mode 100644 index 9af16c7..0000000 --- a/kubernetes/apps/observability/kube-prometheus-stack/app/helm-release.yaml +++ /dev/null @@ -1,207 +0,0 @@ ---- -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: kube-prometheus-stack -spec: - interval: 10m - chartRef: - kind: OCIRepository - name: kube-prometheus-stack - - values: - crds: { enabled: true } - cleanPrometheusOperatorObjectNames: true - - # ========================================================================== - # Alertmanager - # ========================================================================== - alertmanager: - enabled: true - route: - main: - enabled: true - hostnames: ["alertmanager.admin.mirceanton.com"] - parentRefs: - - name: envoy-admin - namespace: network-system - - alertmanagerSpec: - # alertmanagerConfiguration: {name: alertmanager} - # ======================================= - # App Settings - # ======================================= - externalUrl: "https://alertmanager.admin.mirceanton.com" - logFormat: json - logLevel: info - - # ======================================= - # Resources - # ======================================= - replicas: 1 - resources: - requests: - cpu: 20m - memory: 32Mi - limits: - memory: 128Mi - storage: - volumeClaimTemplate: - spec: - storageClassName: openebs-zfs - resources: - requests: - storage: 1Gi - - # ========================================================================== - # Prometheus Operator - # ========================================================================== - prometheusOperator: - enabled: true - # ======================================= - # App Settings - # ======================================= - logFormat: json - logLevel: info - - # ======================================= - # Resources - # ======================================= - resources: - requests: - cpu: 20m - memory: 64Mi - limits: - memory: 128Mi - - # ========================================================================== - # Prometheus - # ========================================================================== - prometheus: - enabled: true - route: - main: - enabled: true - hostnames: ["prometheus.admin.mirceanton.com"] - parentRefs: - - name: envoy-admin - namespace: network-system - - prometheusSpec: - # ======================================= - # App Settings - # ======================================= - externalUrl: "https://prometheus.admin.mirceanton.com" - enableAdminAPI: true - retention: 14d - retentionSize: 50GB - logLevel: info - logFormat: json - - # ======================================= - #? Replace default Prometheus image with prompp and - #? override 'unsupported Prometheus version' error - # ======================================= - version: v2.55.1 - image: - registry: mirror.gcr.io - repository: prompp/prompp - tag: 0.7.4 - - # ======================================= - # Security - # ======================================= - securityContext: - runAsNonRoot: true - runAsUser: 64535 - runAsGroup: 64535 - fsGroup: 64535 - - # ======================================= - #? Disable prometheus resource to be created with selectors based on - #? values in the helm deployment if a nil or {} value is provided - # ======================================= - podMonitorSelectorNilUsesHelmValues: false - probeSelectorNilUsesHelmValues: false - ruleSelectorNilUsesHelmValues: false - scrapeConfigSelectorNilUsesHelmValues: false - serviceMonitorSelectorNilUsesHelmValues: false - - # ======================================= - # Resources - # ======================================= - replicas: 1 - shards: 1 - resources: - requests: - cpu: 50m - memory: 128Mi - limits: - memory: 1Gi - storageSpec: - volumeClaimTemplate: - spec: - storageClassName: openebs-zfs - resources: - requests: - storage: 50Gi - - # ========================================================================== - # Grafana - # ========================================================================== - grafana: - enabled: false - forceDeployDashboards: true - operator: - dashboardsConfigMapRefEnabled: true - folder: monitoring-system - matchLabels: - grafana.internal/instance: grafana - - # ========================================================================== - # Exporters - # ========================================================================== - coreDns: { enabled: true } - kubelet: { enabled: true } - kubeApiServer: { enabled: true } - kubeControllerManager: { enabled: true } - kubeScheduler: { enabled: true } - kubeProxy: { enabled: true } - kubeEtcd: - enabled: true - service: - selector: - component: kube-apiserver - - nodeExporter: { enabled: true } - prometheus-node-exporter: - resources: - requests: - cpu: 20m - memory: 32Mi - limits: - memory: 64Mi - - kubeStateMetrics: { enabled: true } - kube-state-metrics: - resources: - requests: - cpu: 20m - memory: 64Mi - limits: - memory: 128Mi - - # ========================================================================== - # Additional Settings - # ========================================================================== - additionalPrometheusRulesMap: - oom-rules: - groups: - - name: oom - rules: - - alert: OomKilled - annotations: - summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes. - expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1 - labels: - severity: critical diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml new file mode 100644 index 0000000..f96fbf8 --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml @@ -0,0 +1,165 @@ +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/helm.toolkit.fluxcd.io/helmrelease_v2.json +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: kube-prometheus-stack +spec: + interval: 1h + chartRef: + kind: OCIRepository + name: kube-prometheus-stack + valuesFrom: + - kind: ConfigMap + name: flux-metrics-configmap + valuesKey: flux-metrics.yaml + values: + crds: + enabled: true + upgradeJob: + enabled: true + forceConflicts: true + cleanPrometheusOperatorObjectNames: true + alertmanager: + route: + main: + enabled: true + hostnames: + - alertmanager.tholinka.dev + parentRefs: + - name: envoy-internal + namespace: network + annotations: + gethomepage.dev/enabled: 'true' + gethomepage.dev/group: Observability + gethomepage.dev/name: Alertmanager + gethomepage.dev/icon: alertmanager.svg + gethomepage.dev/pod-selector: app.kubernetes.io/name=alertmanager + alertmanagerSpec: + alertmanagerConfiguration: + name: alertmanager + global: + resolveTimeout: 5m + externalUrl: https://alertmanager.tholinka.dev + resources: + requests: + cpu: 10m + memory: 64Mi + storage: + volumeClaimTemplate: + spec: + storageClassName: ceph-block + resources: + requests: + storage: 1Gi + kubeEtcd: + service: + selector: + component: kube-apiserver # etcd runs on control plane nodes + kubeProxy: + enabled: false + prometheusOperator: + resources: + requests: + cpu: 1m + memory: 64Mi + prometheus: + route: + main: + enabled: true + hostnames: + - prometheus.tholinka.dev + parentRefs: + - name: envoy-internal + namespace: network + annotations: + gethomepage.dev/enabled: 'true' + gethomepage.dev/group: Observability + gethomepage.dev/name: Prometheus + gethomepage.dev/icon: prometheus.svg + gethomepage.dev/pod-selector: operator.prometheus.io/name=kube-prometheus-stack + gethomepage.dev/widget.type: prometheus + gethomepage.dev/widget.url: 'http://kube-prometheus-stack-prometheus.observability.svc.cluster.local.:9090' + prometheusSpec: + externalUrl: https://prometheus.tholinka.dev + version: v2.55.1 # override 'unsupported Prometheus version' error for prompp + image: + registry: mirror.gcr.io + repository: prompp/prompp + tag: 0.7.5 + securityContext: + runAsNonRoot: true + runAsUser: 64535 + runAsGroup: 64535 + fsGroup: 64535 + podMonitorSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false + ruleSelectorNilUsesHelmValues: false + scrapeConfigSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + retention: 14d + retentionSize: 50GB + resources: + requests: + cpu: 800m + limits: + memory: 3000Mi + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: ceph-block + resources: + requests: + storage: 50Gi + prometheus-node-exporter: + fullnameOverride: node-exporter + resources: + requests: + cpu: 10m + memory: 32Mi + kube-state-metrics: + fullnameOverride: kube-state-metrics + resources: + requests: + cpu: 10m + memory: 64Mi + grafana: + enabled: false + forceDeployDashboards: true + additionalPrometheusRulesMap: + dockerhub-rules: + groups: + - name: dockerhub + rules: + - alert: DockerhubRateLimitRisk + annotations: + summary: Kubernetes cluster Dockerhub rate limit risk + expr: count (count by (image) (time() - container_last_seen{image=~"(docker.io).*",container!=""} < 30)) > 25 + labels: + severity: critical + oom-rules: + groups: + - name: oom + rules: + - alert: OomKilled + annotations: + summary: Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes. + expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1 + labels: + severity: critical + btrfs-rules: + groups: + - name: btrfs + rules: + - alert: BtrfsDeviceErrorsDetected + annotations: + sumamry: BTRFS device {{$labels.device}} on {{$labels.kubernetes_node}}@{{$labels.instance}} detected an error of type {{$labels.type}} + expr: node_btrfs_device_errors_total > 0 + labels: + severity: critical + - alert: BtrfsDeviceAlmostFull + annotations: + summary: BTRFS device {{$labels.device}}@{{$labels.instance}} has less than 1% of free space left + expr: (node_btrfs_device_unused_bytes{job="node-exporter"} / node_btrfs_device_size_bytes{job="node-exporter"} * 100) < 1 + labels: + severity: critical diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/kustomization.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/kustomization.yaml new file mode 100644 index 0000000..cb3deea --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/kustomization.yaml @@ -0,0 +1,19 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./alertmanagerconfig.yaml + - ./secrets.sops.yaml + - ./grafanadashboard.yaml + - ./helmrelease.yaml + - ./ocirepository.yaml + # - ./scrapeconfig.yaml +configMapGenerator: + - name: flux-metrics-configmap + files: + - flux-metrics.yaml=./resources/flux-metrics.yaml +generatorOptions: + disableNameSuffixHash: true + annotations: + kustomize.toolkit.fluxcd.io/substitute: disabled diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/oci-repository.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/ocirepository.yaml similarity index 61% rename from kubernetes/apps/observability/kube-prometheus-stack/app/oci-repository.yaml rename to kubernetes/apps/observability/kube-prometheus-stack/app/ocirepository.yaml index 61b41bc..9a18f7e 100644 --- a/kubernetes/apps/observability/kube-prometheus-stack/app/oci-repository.yaml +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/ocirepository.yaml @@ -1,13 +1,14 @@ --- +# yaml-language-server: $schema=https://schemas.tholinka.dev/source.toolkit.fluxcd.io/ocirepository_v1.json apiVersion: source.toolkit.fluxcd.io/v1 kind: OCIRepository metadata: name: kube-prometheus-stack spec: - interval: 15m - url: oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack - ref: {tag: 81.5.0} - + interval: 1h layerSelector: mediaType: application/vnd.cncf.helm.chart.content.v1.tar+gzip - operation: copy \ No newline at end of file + operation: copy + ref: + tag: 82.4.0 + url: oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/resources/flux-metrics.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/resources/flux-metrics.yaml new file mode 100644 index 0000000..042db54 --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/resources/flux-metrics.yaml @@ -0,0 +1,370 @@ +--- +kube-state-metrics: + # For kube-prometheus-stacks that are already installed and configured with + # custom collectors, commenting out the collectors and extraArgs below will + # retain any existing kube-state-metrics configuration. + collectors: [] + extraArgs: + - --custom-resource-state-only=true + rbac: + extraRules: + - apiGroups: + - source.toolkit.fluxcd.io + - kustomize.toolkit.fluxcd.io + - helm.toolkit.fluxcd.io + - notification.toolkit.fluxcd.io + resources: + - gitrepositories + - buckets + - helmrepositories + - helmcharts + - ocirepositories + - kustomizations + - helmreleases + - alerts + - providers + - receivers + verbs: + - list + - watch + customResourceState: + enabled: true + config: + spec: + resources: + - groupVersionKind: + group: kustomize.toolkit.fluxcd.io + version: v1 + kind: Kustomization + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux Kustomization resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + ready: + - status + - conditions + - '[type=Ready]' + - status + suspended: + - spec + - suspend + revision: + - status + - lastAppliedRevision + source_name: + - spec + - sourceRef + - name + - groupVersionKind: + group: helm.toolkit.fluxcd.io + version: v2 + kind: HelmRelease + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux HelmRelease resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + ready: + - status + - conditions + - '[type=Ready]' + - status + suspended: + - spec + - suspend + revision: + - status + - history + - '0' + - chartVersion + chart_name: + - status + - history + - '0' + - chartName + chart_app_version: + - status + - history + - '0' + - appVersion + chart_ref_name: + - spec + - chartRef + - name + chart_source_name: + - spec + - chart + - spec + - sourceRef + - name + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: GitRepository + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux GitRepository resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + ready: + - status + - conditions + - '[type=Ready]' + - status + suspended: + - spec + - suspend + revision: + - status + - artifact + - revision + url: + - spec + - url + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: Bucket + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux Bucket resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + ready: + - status + - conditions + - '[type=Ready]' + - status + suspended: + - spec + - suspend + revision: + - status + - artifact + - revision + endpoint: + - spec + - endpoint + bucket_name: + - spec + - bucketName + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: HelmRepository + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux HelmRepository resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + ready: + - status + - conditions + - '[type=Ready]' + - status + suspended: + - spec + - suspend + revision: + - status + - artifact + - revision + url: + - spec + - url + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: HelmChart + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux HelmChart resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + ready: + - status + - conditions + - '[type=Ready]' + - status + suspended: + - spec + - suspend + revision: + - status + - artifact + - revision + chart_name: + - spec + - chart + chart_version: + - spec + - version + - groupVersionKind: + group: source.toolkit.fluxcd.io + version: v1 + kind: OCIRepository + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux OCIRepository resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + ready: + - status + - conditions + - '[type=Ready]' + - status + suspended: + - spec + - suspend + revision: + - status + - artifact + - revision + url: + - spec + - url + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1beta3 + kind: Alert + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux Alert resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + suspended: + - spec + - suspend + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1beta3 + kind: Provider + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux Provider resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + suspended: + - spec + - suspend + - groupVersionKind: + group: notification.toolkit.fluxcd.io + version: v1 + kind: Receiver + metricNamePrefix: gotk + metrics: + - name: resource_info + help: The current state of a Flux Receiver resource. + each: + type: Info + info: + labelsFromPath: + name: + - metadata + - name + labelsFromPath: + exported_namespace: + - metadata + - namespace + ready: + - status + - conditions + - '[type=Ready]' + - status + suspended: + - spec + - suspend + webhook_path: + - status + - webhookPath diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/scrapeconfig.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/scrapeconfig.yaml new file mode 100644 index 0000000..a625387 --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/scrapeconfig.yaml @@ -0,0 +1,31 @@ +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json +apiVersion: monitoring.coreos.com/v1alpha1 +kind: ScrapeConfig +metadata: + name: &name node-exporter +spec: + staticConfigs: + - targets: + - nas.servers.internal:9100 + - pikvm.servers.internal:9100 + metricsPath: /metrics + relabelings: + - action: replace + targetLabel: job + replacement: *name +--- +# yaml-language-server: $schema=https://schemas.tholinka.dev/monitoring.coreos.com/scrapeconfig_v1alpha1.json +apiVersion: monitoring.coreos.com/v1alpha1 +kind: ScrapeConfig +metadata: + name: &name smartctl-exporter +spec: + staticConfigs: + - targets: + - nas.servers.internal:9633 + metricsPath: /metrics + relabelings: + - action: replace + targetLabel: job + replacement: *name diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/secrets.sops.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/secrets.sops.yaml new file mode 100644 index 0000000..0b3686d --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/secrets.sops.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: kube-prometheus-stack-secret +stringData: + ALERTMANAGER_PUSHOVER_TOKEN: "PLACEHOLDER" + PUSHOVER_USER_KEY: "PLACEHOLDER" + HEALTHCHECKS_IO_HEARTBEAT_URL: "PLACEHOLDER" diff --git a/kubernetes/apps/observability/kustomization.yaml b/kubernetes/apps/observability/kustomization.yaml index 1fb33de..e7f4256 100644 --- a/kubernetes/apps/observability/kustomization.yaml +++ b/kubernetes/apps/observability/kustomization.yaml @@ -6,5 +6,7 @@ namespace: observability resources: - ./namespace.yaml - ./headlamp - #- ./grafana - #- ./kube-prometheus-stack \ No newline at end of file + - ./grafana + #- ./kube-prometheus-stack +components: + - ../../components/repos/app-template