mirror of
https://github.com/carlosedp/cluster-monitoring.git
synced 2025-08-27 13:46:16 +02:00
Merge 2053eeecea
into a8c6bba80d
This commit is contained in:
commit
715fec25fd
@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local utils = import 'utils.libsonnet';
|
||||
local vars = import 'vars.jsonnet';
|
||||
|
||||
@ -17,7 +17,7 @@ local vars = import 'vars.jsonnet';
|
||||
prometheus+:: {
|
||||
names: 'k8s',
|
||||
replicas: 1,
|
||||
namespaces: ['default', 'kube-system', 'monitoring'],
|
||||
namespaces: ['default', 'kube-system', vars._config.namespace],
|
||||
},
|
||||
|
||||
alertmanager+:: {
|
||||
@ -60,7 +60,7 @@ local vars = import 'vars.jsonnet';
|
||||
},
|
||||
},
|
||||
plugins: vars.grafana.plugins,
|
||||
env: vars.grafana.env
|
||||
env: vars.grafana.env,
|
||||
},
|
||||
},
|
||||
//---------------------------------------
|
||||
|
@ -4,10 +4,18 @@
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/kube-prometheus.git",
|
||||
"remote": "https://github.com/prometheus-operator/kube-prometheus.git",
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "main"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/ksonnet/ksonnet-lib.git"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
}
|
||||
],
|
||||
|
@ -8,38 +8,28 @@
|
||||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "57b4365eacda291b82e0d55ba7eec573a8198dda",
|
||||
"sum": "92DWADwGjnCfpZaL7Q07C0GZayxBziGla/O03qWea34="
|
||||
"version": "d039275e4916aceae1c137120882e01d857787ac",
|
||||
"sum": "515vMn4x4tP8vegL4HLW0nDO5+njGTgnDZB5OOhtsCI="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/etcd.git",
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
"remote": "https://github.com/etcd-io/etcd.git",
|
||||
"subdir": "contrib/mixin"
|
||||
}
|
||||
},
|
||||
"version": "d8c8f903eee10b8391abaef7758c38b2cd393c55",
|
||||
"sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398="
|
||||
"version": "e73f55d4e94666c99558baa2fd4e365aeaca4dc4",
|
||||
"sum": "IkDHlaE0gvvcPjSNurFT+jQ2aCOAbqHF1WVmXbAgkds="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/kube-prometheus.git",
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
"remote": "https://github.com/grafana/grafana.git",
|
||||
"subdir": "grafana-mixin"
|
||||
}
|
||||
},
|
||||
"version": "17989b42aa10b1c6afa07043cb05bcd5ae492284",
|
||||
"sum": "2FR289B1LGUf5tTN4PXBj5TjRX7okSFxE8uHkSslzDQ="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/prometheus-operator.git",
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "e31c69f9b5c6555e0f4a5c1f39d0f03182dd6b41",
|
||||
"sum": "WggWVWZ+CBEUThQCztSaRELbtqdXf9s3OFzf06HbYNA="
|
||||
"version": "1120f9e255760a3c104b57871fcb91801e934382",
|
||||
"sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -48,8 +38,18 @@
|
||||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "8fb95bd89990e493a8534205ee636bfcb8db67bd",
|
||||
"sum": "tDuuSKE9f4Ew2bjBM33Rs6behLEAzkmKkShSt+jpAak="
|
||||
"version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
|
||||
"sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/grafana/grafonnet-lib.git",
|
||||
"subdir": "grafonnet-7.0"
|
||||
}
|
||||
},
|
||||
"version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
|
||||
"sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -58,8 +58,8 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "881db2241f0c5007c3e831caf34b0c645202b4ab",
|
||||
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
|
||||
"version": "d68f9a6e0b1af7c4c4056dc2b43fb8f3bac01f43",
|
||||
"sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -69,8 +69,7 @@
|
||||
}
|
||||
},
|
||||
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
|
||||
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=",
|
||||
"name": "ksonnet"
|
||||
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -79,18 +78,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
|
||||
"sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
|
||||
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
||||
"version": "3c386687c1f8ceb6b79ff887c4a934e9cee1b90a",
|
||||
"sum": "H8lcnk7gQEUoRi58/xq+JTfd2PcjJUjMQHgxGklUiFY="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -99,8 +88,8 @@
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
|
||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
||||
"version": "71200632a6c97e9b87166dbb27489798a05effe3",
|
||||
"sum": "4PJ2ROxODsoYO/1Y70+dgLZVjW5zlfzB+TDpxJBHwaI="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -109,8 +98,50 @@
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
|
||||
"sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8="
|
||||
"version": "71200632a6c97e9b87166dbb27489798a05effe3",
|
||||
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/kube-prometheus.git",
|
||||
"subdir": "jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "37d00082289c587f5a02a343ba23cfbe167000e2",
|
||||
"sum": "5onAaPSrjnmgXIAsypnx0W/sIA7iTsHCeCjPrhGxj5A="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/prometheus-operator.git",
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "df4cbd9526d8ff8e404a903b7ed2532847551d19",
|
||||
"sum": "GQmaVFJwKMiD/P4n3N2LrAZVcwutriWrP8joclDtBYQ=",
|
||||
"name": "prometheus-operator-mixin"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus-operator/prometheus-operator.git",
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "df4cbd9526d8ff8e404a903b7ed2532847551d19",
|
||||
"sum": "wJ1E8XxYJ0RJrUuDNWLzE7bzo6JrH7P9q1lAu/xi4Ow="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/prometheus/alertmanager.git",
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "8afd462a9eaa3979bddf7bd6278bede4bc1f30e2",
|
||||
"sum": "PsK+V7oETCPKu2gLoPfqY0wwPKH9TzhNj6o2xezjjXc=",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -119,8 +150,8 @@
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "08ce3c6dd430deb51798826701a395e460620d60",
|
||||
"sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc="
|
||||
"version": "a3bd2e13052929663dbd7d680fab4a952efb1de6",
|
||||
"sum": "TwdaTm0Z++diiLyaKAAimmC6hBL7XbrJc0RHhBCpAdU="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -129,9 +160,30 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "74207c04655e1fd93eea0e9a5d2f31b1cbc4d3d0",
|
||||
"sum": "lEzhZ8gllSfAO4kmXeTwl4W0anapIeFd5GCaCNuDe18=",
|
||||
"version": "84c6f0e58444a452a5e2e19d14221409d2b9d790",
|
||||
"sum": "LRx0tbMnoE1p8KEn+i81j2YsA5Sgt3itE5Y6jBf5eOQ=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/pyrra-dev/pyrra.git",
|
||||
"subdir": "config/crd/bases"
|
||||
}
|
||||
},
|
||||
"version": "2584cefb8e6859eb9ee103df199e232cd0066aab",
|
||||
"sum": "d1550yhsX4VxdVN7b0gWT0cido/W90P6OGLzLqPwZcs="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/thanos-io/thanos.git",
|
||||
"subdir": "mixin"
|
||||
}
|
||||
},
|
||||
"version": "3327c510076a77f876ac26e699d5252a61fc529a",
|
||||
"sum": "Io++1+lp1oQVoQiVRSCXUiGdTIRPV7aL6Ewgs3bShEs=",
|
||||
"name": "thanos-mixin"
|
||||
}
|
||||
],
|
||||
"legacyImports": false
|
||||
|
@ -1,10 +1,10 @@
|
||||
local utils = import 'utils.libsonnet';
|
||||
local vars = import 'vars.jsonnet';
|
||||
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet')
|
||||
+ (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet')
|
||||
+ (import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet')
|
||||
+ (import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet')
|
||||
local kp = (import 'kube-prometheus/main.libsonnet') + { values+:: { common+: { namespace: vars._config.namespace } } }
|
||||
+ (import 'kube-prometheus/addons/anti-affinity.libsonnet')
|
||||
+ (import 'kube-prometheus/platforms/kops-coredns.libsonnet')
|
||||
+ (import 'kube-prometheus/platforms/kubeadm.libsonnet')
|
||||
// Additional modules are loaded dynamically from vars.jsonnet
|
||||
+ utils.join_objects([module.file for module in vars.modules if module.enabled])
|
||||
// Load K3s customized modules
|
||||
@ -14,6 +14,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet')
|
||||
// Load image versions last to override default from modules
|
||||
+ (import 'image_sources_versions.jsonnet');
|
||||
|
||||
|
||||
// Generate core modules
|
||||
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) }
|
||||
// First generate operator resources except the serviceMonitors
|
||||
|
@ -2,7 +2,11 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: Alertmanager
|
||||
metadata:
|
||||
labels:
|
||||
alertmanager: main
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
name: main
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -11,22 +15,36 @@ spec:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: alertmanager
|
||||
operator: In
|
||||
values:
|
||||
- main
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
namespaces:
|
||||
- monitoring
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
image: prom/alertmanager:v0.21.0
|
||||
image: quay.io/prometheus/alertmanager:v0.25.0
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
replicas: 1
|
||||
podMetadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
replicas: 3
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
requests:
|
||||
cpu: 4m
|
||||
memory: 100Mi
|
||||
securityContext:
|
||||
fsGroup: 2000
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
serviceAccountName: alertmanager-main
|
||||
version: v0.21.0
|
||||
version: 0.25.0
|
||||
|
42
manifests/alertmanager-networkPolicy.yaml
Normal file
42
manifests/alertmanager-networkPolicy.yaml
Normal file
@ -0,0 +1,42 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
name: alertmanager-main
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 9093
|
||||
protocol: TCP
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: alertmanager
|
||||
ports:
|
||||
- port: 9094
|
||||
protocol: TCP
|
||||
- port: 9094
|
||||
protocol: UDP
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
19
manifests/alertmanager-podDisruptionBudget.yaml
Normal file
19
manifests/alertmanager-podDisruptionBudget.yaml
Normal file
@ -0,0 +1,19 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
name: alertmanager-main
|
||||
namespace: monitoring
|
||||
spec:
|
||||
maxUnavailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
139
manifests/alertmanager-prometheusRule.yaml
Normal file
139
manifests/alertmanager-prometheusRule.yaml
Normal file
@ -0,0 +1,139 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
name: alertmanager-main-rules
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: alertmanager.rules
|
||||
rules:
|
||||
- alert: AlertmanagerFailedReload
|
||||
annotations:
|
||||
description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload
|
||||
summary: Reloading an Alertmanager configuration has failed.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerMembersInconsistent
|
||||
annotations:
|
||||
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent
|
||||
summary: A member of an Alertmanager cluster has not found all other cluster members.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
< on (namespace,service) group_left
|
||||
count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]))
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerFailedToSendAlerts
|
||||
annotations:
|
||||
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts
|
||||
summary: An Alertmanager instance failed to send notifications.
|
||||
expr: |
|
||||
(
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||
annotations:
|
||||
description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
|
||||
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
|
||||
expr: |
|
||||
min by (namespace,service, integration) (
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||
annotations:
|
||||
description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
|
||||
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
|
||||
expr: |
|
||||
min by (namespace,service, integration) (
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AlertmanagerConfigInconsistent
|
||||
annotations:
|
||||
description: Alertmanager instances within the {{$labels.job}} cluster have different configurations.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent
|
||||
summary: Alertmanager instances within the same cluster have different configurations.
|
||||
expr: |
|
||||
count by (namespace,service) (
|
||||
count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})
|
||||
)
|
||||
!= 1
|
||||
for: 20m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerClusterDown
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown
|
||||
summary: Half or more of the Alertmanager instances within the same cluster are down.
|
||||
expr: |
|
||||
(
|
||||
count by (namespace,service) (
|
||||
avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5
|
||||
)
|
||||
/
|
||||
count by (namespace,service) (
|
||||
up{job="alertmanager-main",namespace="monitoring"}
|
||||
)
|
||||
)
|
||||
>= 0.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerClusterCrashlooping
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping
|
||||
summary: Half or more of the Alertmanager instances within the same cluster are crashlooping.
|
||||
expr: |
|
||||
(
|
||||
count by (namespace,service) (
|
||||
changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4
|
||||
)
|
||||
/
|
||||
count by (namespace,service) (
|
||||
up{job="alertmanager-main",namespace="monitoring"}
|
||||
)
|
||||
)
|
||||
>= 0.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
@ -1,7 +1,12 @@
|
||||
apiVersion: v1
|
||||
data: {}
|
||||
kind: Secret
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
name: alertmanager-main
|
||||
namespace: monitoring
|
||||
stringData:
|
||||
@ -12,21 +17,28 @@ stringData:
|
||||
- "equal":
|
||||
- "namespace"
|
||||
- "alertname"
|
||||
"source_match":
|
||||
"severity": "critical"
|
||||
"target_match_re":
|
||||
"severity": "warning|info"
|
||||
"source_matchers":
|
||||
- "severity = critical"
|
||||
"target_matchers":
|
||||
- "severity =~ warning|info"
|
||||
- "equal":
|
||||
- "namespace"
|
||||
- "alertname"
|
||||
"source_match":
|
||||
"severity": "warning"
|
||||
"target_match_re":
|
||||
"severity": "info"
|
||||
"source_matchers":
|
||||
- "severity = warning"
|
||||
"target_matchers":
|
||||
- "severity = info"
|
||||
- "equal":
|
||||
- "namespace"
|
||||
"source_matchers":
|
||||
- "alertname = InfoInhibitor"
|
||||
"target_matchers":
|
||||
- "severity = info"
|
||||
"receivers":
|
||||
- "name": "Default"
|
||||
- "name": "Watchdog"
|
||||
- "name": "Critical"
|
||||
- "name": "null"
|
||||
"route":
|
||||
"group_by":
|
||||
- "namespace"
|
||||
@ -35,10 +47,13 @@ stringData:
|
||||
"receiver": "Default"
|
||||
"repeat_interval": "12h"
|
||||
"routes":
|
||||
- "match":
|
||||
"alertname": "Watchdog"
|
||||
- "matchers":
|
||||
- "alertname = Watchdog"
|
||||
"receiver": "Watchdog"
|
||||
- "match":
|
||||
"severity": "critical"
|
||||
- "matchers":
|
||||
- "alertname = InfoInhibitor"
|
||||
"receiver": "null"
|
||||
- "matchers":
|
||||
- "severity = critical"
|
||||
"receiver": "Critical"
|
||||
type: Opaque
|
||||
|
@ -2,7 +2,11 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
alertmanager: main
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
name: alertmanager-main
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -10,7 +14,12 @@ spec:
|
||||
- name: web
|
||||
port: 9093
|
||||
targetPort: web
|
||||
- name: reloader-web
|
||||
port: 8080
|
||||
targetPort: reloader-web
|
||||
selector:
|
||||
alertmanager: main
|
||||
app: alertmanager
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
sessionAffinity: ClientIP
|
||||
|
@ -1,5 +1,12 @@
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
name: alertmanager-main
|
||||
namespace: monitoring
|
||||
|
@ -2,13 +2,22 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: alertmanager
|
||||
name: alertmanager
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.25.0
|
||||
name: alertmanager-main
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- interval: 30s
|
||||
port: web
|
||||
- interval: 30s
|
||||
port: reloader-web
|
||||
selector:
|
||||
matchLabels:
|
||||
alertmanager: main
|
||||
app.kubernetes.io/component: alert-router
|
||||
app.kubernetes.io/instance: main
|
||||
app.kubernetes.io/name: alertmanager
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,8 +1,15 @@
|
||||
apiVersion: v1
|
||||
data:
|
||||
grafana.ini: W2F1dGguYW5vbnltb3VzXQplbmFibGVkID0gZmFsc2UKW2F1dGguYmFzaWNdCmVuYWJsZWQgPSBmYWxzZQpbc2Vzc2lvbl0KcHJvdmlkZXIgPSBtZW1vcnkKW3NtdHBdCmVuYWJsZWQgPSB0cnVlCmZyb21fYWRkcmVzcyA9IG15ZW1haWxAZ21haWwuY29tCmZyb21fbmFtZSA9IEdyYWZhbmEgQWxlcnQKaG9zdCA9IHNtdHAtc2VydmVyLm1vbml0b3Jpbmcuc3ZjOjI1CnBhc3N3b3JkID0gCnNraXBfdmVyaWZ5ID0gdHJ1ZQp1c2VyID0gCg==
|
||||
kind: Secret
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
name: grafana-config
|
||||
namespace: monitoring
|
||||
stringData:
|
||||
grafana.ini: |
|
||||
[date_formats]
|
||||
default_timezone = UTC
|
||||
type: Opaque
|
||||
|
@ -1,8 +1,27 @@
|
||||
apiVersion: v1
|
||||
data:
|
||||
datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
|
||||
kind: Secret
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
name: grafana-datasources
|
||||
namespace: monitoring
|
||||
stringData:
|
||||
datasources.yaml: |-
|
||||
{
|
||||
"apiVersion": 1,
|
||||
"datasources": [
|
||||
{
|
||||
"access": "proxy",
|
||||
"editable": false,
|
||||
"name": "prometheus",
|
||||
"orgId": 1,
|
||||
"type": "prometheus",
|
||||
"url": "http://prometheus-k8s.monitoring.svc:9090",
|
||||
"version": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
type: Opaque
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,6 +6,7 @@ data:
|
||||
"providers": [
|
||||
{
|
||||
"folder": "Default",
|
||||
"folderUid": "",
|
||||
"name": "0",
|
||||
"options": {
|
||||
"path": "/grafana-dashboard-definitions/0"
|
||||
@ -17,5 +18,10 @@ data:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
name: grafana-dashboards
|
||||
namespace: monitoring
|
||||
|
@ -2,22 +2,35 @@ apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
name: grafana
|
||||
namespace: monitoring
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
checksum/grafana-config: adbde4cde1aa3ca57c408943af53e6f7
|
||||
checksum/grafana-dashboardproviders: d8fb24844314114bed088b83042b1bdb
|
||||
checksum/grafana-datasources: 0800bab7ea1e2d8ad5c09586d089e033
|
||||
labels:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
spec:
|
||||
automountServiceAccountToken: false
|
||||
containers:
|
||||
- env: []
|
||||
image: grafana/grafana:7.0.3
|
||||
image: grafana/grafana:9.3.2
|
||||
name: grafana
|
||||
ports:
|
||||
- containerPort: 3000
|
||||
@ -33,6 +46,12 @@ spec:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 100Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
volumeMounts:
|
||||
- mountPath: /var/lib/grafana
|
||||
name: grafana-storage
|
||||
@ -43,6 +62,12 @@ spec:
|
||||
- mountPath: /etc/grafana/provisioning/dashboards
|
||||
name: grafana-dashboards
|
||||
readOnly: false
|
||||
- mountPath: /tmp
|
||||
name: tmp-plugins
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/alertmanager-overview
|
||||
name: grafana-dashboard-alertmanager-overview
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/apiserver
|
||||
name: grafana-dashboard-apiserver
|
||||
readOnly: false
|
||||
@ -52,8 +77,8 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/controller-manager
|
||||
name: grafana-dashboard-controller-manager
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/coredns-dashboard
|
||||
name: grafana-dashboard-coredns-dashboard
|
||||
- mountPath: /grafana-dashboard-definitions/0/grafana-overview
|
||||
name: grafana-dashboard-grafana-overview
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
|
||||
name: grafana-dashboard-k8s-resources-cluster
|
||||
@ -76,9 +101,6 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/kubelet
|
||||
name: grafana-dashboard-kubelet
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/kubernetes-cluster-dashboard
|
||||
name: grafana-dashboard-kubernetes-cluster-dashboard
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
readOnly: false
|
||||
@ -91,6 +113,9 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/nodes-darwin
|
||||
name: grafana-dashboard-nodes-darwin
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/nodes
|
||||
name: grafana-dashboard-nodes
|
||||
readOnly: false
|
||||
@ -100,9 +125,6 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/pod-total
|
||||
name: grafana-dashboard-pod-total
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/prometheus-dashboard
|
||||
name: grafana-dashboard-prometheus-dashboard
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
|
||||
name: grafana-dashboard-prometheus-remote-write
|
||||
readOnly: false
|
||||
@ -115,9 +137,6 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/scheduler
|
||||
name: grafana-dashboard-scheduler
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/statefulset
|
||||
name: grafana-dashboard-statefulset
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/workload-total
|
||||
name: grafana-dashboard-workload-total
|
||||
readOnly: false
|
||||
@ -125,8 +144,9 @@ spec:
|
||||
name: grafana-config
|
||||
readOnly: false
|
||||
nodeSelector:
|
||||
beta.kubernetes.io/os: linux
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
fsGroup: 65534
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
serviceAccountName: grafana
|
||||
@ -139,6 +159,12 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboards
|
||||
name: grafana-dashboards
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
name: tmp-plugins
|
||||
- configMap:
|
||||
name: grafana-dashboard-alertmanager-overview
|
||||
name: grafana-dashboard-alertmanager-overview
|
||||
- configMap:
|
||||
name: grafana-dashboard-apiserver
|
||||
name: grafana-dashboard-apiserver
|
||||
@ -149,8 +175,8 @@ spec:
|
||||
name: grafana-dashboard-controller-manager
|
||||
name: grafana-dashboard-controller-manager
|
||||
- configMap:
|
||||
name: grafana-dashboard-coredns-dashboard
|
||||
name: grafana-dashboard-coredns-dashboard
|
||||
name: grafana-dashboard-grafana-overview
|
||||
name: grafana-dashboard-grafana-overview
|
||||
- configMap:
|
||||
name: grafana-dashboard-k8s-resources-cluster
|
||||
name: grafana-dashboard-k8s-resources-cluster
|
||||
@ -172,9 +198,6 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-kubelet
|
||||
name: grafana-dashboard-kubelet
|
||||
- configMap:
|
||||
name: grafana-dashboard-kubernetes-cluster-dashboard
|
||||
name: grafana-dashboard-kubernetes-cluster-dashboard
|
||||
- configMap:
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
@ -187,6 +210,9 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
- configMap:
|
||||
name: grafana-dashboard-nodes-darwin
|
||||
name: grafana-dashboard-nodes-darwin
|
||||
- configMap:
|
||||
name: grafana-dashboard-nodes
|
||||
name: grafana-dashboard-nodes
|
||||
@ -196,9 +222,6 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-pod-total
|
||||
name: grafana-dashboard-pod-total
|
||||
- configMap:
|
||||
name: grafana-dashboard-prometheus-dashboard
|
||||
name: grafana-dashboard-prometheus-dashboard
|
||||
- configMap:
|
||||
name: grafana-dashboard-prometheus-remote-write
|
||||
name: grafana-dashboard-prometheus-remote-write
|
||||
@ -211,9 +234,6 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-scheduler
|
||||
name: grafana-dashboard-scheduler
|
||||
- configMap:
|
||||
name: grafana-dashboard-statefulset
|
||||
name: grafana-dashboard-statefulset
|
||||
- configMap:
|
||||
name: grafana-dashboard-workload-total
|
||||
name: grafana-dashboard-workload-total
|
||||
|
29
manifests/grafana-networkPolicy.yaml
Normal file
29
manifests/grafana-networkPolicy.yaml
Normal file
@ -0,0 +1,29 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
name: grafana
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 3000
|
||||
protocol: TCP
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
33
manifests/grafana-prometheusRule.yaml
Normal file
33
manifests/grafana-prometheusRule.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
name: grafana-rules
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: GrafanaAlerts
|
||||
rules:
|
||||
- alert: GrafanaRequestsFailing
|
||||
annotations:
|
||||
message: '{{ $labels.namespace }}/{{ $labels.job }}/{{ $labels.handler }} is experiencing {{ $value | humanize }}% errors'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/grafana/grafanarequestsfailing
|
||||
expr: |
|
||||
100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."}
|
||||
/ ignoring (status_code)
|
||||
sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"})
|
||||
> 50
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: grafana_rules
|
||||
rules:
|
||||
- expr: |
|
||||
sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))
|
||||
record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m
|
@ -2,7 +2,10 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
name: grafana
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -11,4 +14,6 @@ spec:
|
||||
port: 3000
|
||||
targetPort: http
|
||||
selector:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,5 +1,11 @@
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
name: grafana
|
||||
namespace: monitoring
|
||||
|
@ -1,6 +1,11 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 9.3.2
|
||||
name: grafana
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -9,4 +14,4 @@ spec:
|
||||
port: http
|
||||
selector:
|
||||
matchLabels:
|
||||
app: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
|
@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.6
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
name: kube-state-metrics
|
||||
rules:
|
||||
- apiGroups:
|
||||
@ -14,6 +16,7 @@ rules:
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- serviceaccounts
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
@ -24,16 +27,6 @@ rules:
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- extensions
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
@ -85,6 +78,13 @@ rules:
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- discovery.k8s.io
|
||||
resources:
|
||||
- endpointslices
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
@ -105,6 +105,8 @@ rules:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
- ingressclasses
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
@ -115,3 +117,13 @@ rules:
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- rbac.authorization.k8s.io
|
||||
resources:
|
||||
- clusterrolebindings
|
||||
- clusterroles
|
||||
- rolebindings
|
||||
- roles
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.6
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
name: kube-state-metrics
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
@ -2,55 +2,104 @@ apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.6
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
kubectl.kubernetes.io/default-container: kube-state-metrics
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.6
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
spec:
|
||||
automountServiceAccountToken: true
|
||||
containers:
|
||||
- args:
|
||||
- --host=127.0.0.1
|
||||
- --port=8081
|
||||
- --telemetry-host=127.0.0.1
|
||||
- --telemetry-port=8082
|
||||
image: carlosedp/kube-state-metrics:v1.9.6
|
||||
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.7.0
|
||||
name: kube-state-metrics
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 250Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 190Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsUser: 65534
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:8081/
|
||||
image: carlosedp/kube-rbac-proxy:v0.5.0
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.14.0
|
||||
name: kube-rbac-proxy-main
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https-main
|
||||
resources:
|
||||
limits:
|
||||
cpu: 40m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 20m
|
||||
memory: 20Mi
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:9443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:8082/
|
||||
image: carlosedp/kube-rbac-proxy:v0.5.0
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.14.0
|
||||
name: kube-rbac-proxy-self
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
name: https-self
|
||||
resources:
|
||||
limits:
|
||||
cpu: 20m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
serviceAccountName: kube-state-metrics
|
||||
|
31
manifests/kube-state-metrics-networkPolicy.yaml
Normal file
31
manifests/kube-state-metrics-networkPolicy.yaml
Normal file
@ -0,0 +1,31 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 8443
|
||||
protocol: TCP
|
||||
- port: 9443
|
||||
protocol: TCP
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
65
manifests/kube-state-metrics-prometheusRule.yaml
Normal file
65
manifests/kube-state-metrics-prometheusRule.yaml
Normal file
@ -0,0 +1,65 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
name: kube-state-metrics-rules
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: kube-state-metrics
|
||||
rules:
|
||||
- alert: KubeStateMetricsListErrors
|
||||
annotations:
|
||||
description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors
|
||||
summary: kube-state-metrics is experiencing errors in list operations.
|
||||
expr: |
|
||||
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
/
|
||||
sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
|
||||
> 0.01
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsWatchErrors
|
||||
annotations:
|
||||
description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors
|
||||
summary: kube-state-metrics is experiencing errors in watch operations.
|
||||
expr: |
|
||||
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
/
|
||||
sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
|
||||
> 0.01
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsShardingMismatch
|
||||
annotations:
|
||||
description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch
|
||||
summary: kube-state-metrics sharding is misconfigured.
|
||||
expr: |
|
||||
stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsShardsMissing
|
||||
annotations:
|
||||
description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing
|
||||
summary: kube-state-metrics shards are missing.
|
||||
expr: |
|
||||
2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1
|
||||
-
|
||||
sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) )
|
||||
!= 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
@ -2,8 +2,10 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.6
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -16,4 +18,6 @@ spec:
|
||||
port: 9443
|
||||
targetPort: https-self
|
||||
selector:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,8 +1,11 @@
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.6
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
|
@ -2,8 +2,10 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/version: 1.9.6
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.7.0
|
||||
name: kube-state-metrics
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -11,6 +13,11 @@ spec:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: kube_endpoint_address_not_ready|kube_endpoint_address_available
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: https-main
|
||||
relabelings:
|
||||
- action: labeldrop
|
||||
@ -28,4 +35,6 @@ spec:
|
||||
jobLabel: app.kubernetes.io/name
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-state-metrics
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,7 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
rules:
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
|
@ -1,7 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
|
@ -2,30 +2,41 @@ apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
kubectl.kubernetes.io/default-container: node-exporter
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
spec:
|
||||
automountServiceAccountToken: true
|
||||
containers:
|
||||
- args:
|
||||
- --web.listen-address=127.0.0.1:9100
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/host/root
|
||||
- --path.udev.data=/host/root/run/udev/data
|
||||
- --no-collector.wifi
|
||||
- --no-collector.hwmon
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
image: prom/node-exporter:v0.18.1
|
||||
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
- --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$
|
||||
- --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$
|
||||
image: quay.io/prometheus/node-exporter:v1.5.0
|
||||
name: node-exporter
|
||||
resources:
|
||||
limits:
|
||||
@ -34,13 +45,19 @@ spec:
|
||||
requests:
|
||||
cpu: 102m
|
||||
memory: 180Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
add:
|
||||
- SYS_TIME
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
volumeMounts:
|
||||
- mountPath: /host/proc
|
||||
name: proc
|
||||
readOnly: false
|
||||
- mountPath: /host/sys
|
||||
mountPropagation: HostToContainer
|
||||
name: sys
|
||||
readOnly: false
|
||||
readOnly: true
|
||||
- mountPath: /host/root
|
||||
mountPropagation: HostToContainer
|
||||
name: root
|
||||
@ -48,14 +65,14 @@ spec:
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=[$(IP)]:9100
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:9100/
|
||||
env:
|
||||
- name: IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
image: carlosedp/kube-rbac-proxy:v0.5.0
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.14.0
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 9100
|
||||
@ -68,10 +85,20 @@ spec:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
priorityClassName: system-cluster-critical
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
@ -79,12 +106,13 @@ spec:
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /proc
|
||||
name: proc
|
||||
- hostPath:
|
||||
path: /sys
|
||||
name: sys
|
||||
- hostPath:
|
||||
path: /
|
||||
name: root
|
||||
updateStrategy:
|
||||
rollingUpdate:
|
||||
maxUnavailable: 10%
|
||||
type: RollingUpdate
|
||||
|
29
manifests/node-exporter-networkPolicy.yaml
Normal file
29
manifests/node-exporter-networkPolicy.yaml
Normal file
@ -0,0 +1,29 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 9100
|
||||
protocol: TCP
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
316
manifests/node-exporter-prometheusRule.yaml
Normal file
316
manifests/node-exporter-prometheusRule.yaml
Normal file
@ -0,0 +1,316 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
name: node-exporter-rules
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: node-exporter
|
||||
rules:
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 5% space left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 3% space left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 5% inodes left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 3% inodes left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeNetworkReceiveErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
|
||||
summary: Network interface is reporting many receive errors.
|
||||
expr: |
|
||||
rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeNetworkTransmitErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
|
||||
summary: Network interface is reporting many transmit errors.
|
||||
expr: |
|
||||
rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeHighNumberConntrackEntriesUsed
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
|
||||
summary: Number of conntrack are getting close to the limit.
|
||||
expr: |
|
||||
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeTextFileCollectorScrapeError
|
||||
annotations:
|
||||
description: Node Exporter text file collector failed to scrape.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
|
||||
summary: Node Exporter text file collector failed to scrape.
|
||||
expr: |
|
||||
node_textfile_scrape_error{job="node-exporter"} == 1
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockSkewDetected
|
||||
annotations:
|
||||
description: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
|
||||
summary: Clock skew detected.
|
||||
expr: |
|
||||
(
|
||||
node_timex_offset_seconds{job="node-exporter"} > 0.05
|
||||
and
|
||||
deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
|
||||
)
|
||||
or
|
||||
(
|
||||
node_timex_offset_seconds{job="node-exporter"} < -0.05
|
||||
and
|
||||
deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
|
||||
)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockNotSynchronising
|
||||
annotations:
|
||||
description: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
|
||||
summary: Clock not synchronising.
|
||||
expr: |
|
||||
min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
|
||||
and
|
||||
node_timex_maxerror_seconds{job="node-exporter"} >= 16
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeRAIDDegraded
|
||||
annotations:
|
||||
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
|
||||
summary: RAID Array is degraded
|
||||
expr: |
|
||||
node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeRAIDDiskFailure
|
||||
annotations:
|
||||
description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
|
||||
summary: Failed device in RAID array
|
||||
expr: |
|
||||
node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFileDescriptorLimit
|
||||
annotations:
|
||||
description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
||||
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||
expr: |
|
||||
(
|
||||
node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFileDescriptorLimit
|
||||
annotations:
|
||||
description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
||||
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||
expr: |
|
||||
(
|
||||
node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: node-exporter.rules
|
||||
rules:
|
||||
- expr: |
|
||||
count without (cpu, mode) (
|
||||
node_cpu_seconds_total{job="node-exporter",mode="idle"}
|
||||
)
|
||||
record: instance:node_num_cpu:sum
|
||||
- expr: |
|
||||
1 - avg without (cpu) (
|
||||
sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m]))
|
||||
)
|
||||
record: instance:node_cpu_utilisation:rate5m
|
||||
- expr: |
|
||||
(
|
||||
node_load1{job="node-exporter"}
|
||||
/
|
||||
instance:node_num_cpu:sum{job="node-exporter"}
|
||||
)
|
||||
record: instance:node_load1_per_cpu:ratio
|
||||
- expr: |
|
||||
1 - (
|
||||
(
|
||||
node_memory_MemAvailable_bytes{job="node-exporter"}
|
||||
or
|
||||
(
|
||||
node_memory_Buffers_bytes{job="node-exporter"}
|
||||
+
|
||||
node_memory_Cached_bytes{job="node-exporter"}
|
||||
+
|
||||
node_memory_MemFree_bytes{job="node-exporter"}
|
||||
+
|
||||
node_memory_Slab_bytes{job="node-exporter"}
|
||||
)
|
||||
)
|
||||
/
|
||||
node_memory_MemTotal_bytes{job="node-exporter"}
|
||||
)
|
||||
record: instance:node_memory_utilisation:ratio
|
||||
- expr: |
|
||||
rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])
|
||||
record: instance:node_vmstat_pgmajfault:rate5m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate5m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate5m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate5m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate5m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate5m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m])
|
||||
)
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate5m
|
@ -2,8 +2,10 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -13,4 +15,6 @@ spec:
|
||||
port: 9100
|
||||
targetPort: https
|
||||
selector:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,5 +1,11 @@
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
|
@ -2,8 +2,10 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/version: v0.18.1
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 1.5.0
|
||||
name: node-exporter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -24,4 +26,6 @@ spec:
|
||||
jobLabel: app.kubernetes.io/name
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: node-exporter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,6 +1,11 @@
|
||||
apiVersion: apiregistration.k8s.io/v1
|
||||
kind: APIService
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: v1beta1.metrics.k8s.io
|
||||
spec:
|
||||
group: metrics.k8s.io
|
||||
|
@ -1,7 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
|
@ -2,10 +2,15 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
rbac.authorization.k8s.io/aggregate-to-admin: "true"
|
||||
rbac.authorization.k8s.io/aggregate-to-edit: "true"
|
||||
rbac.authorization.k8s.io/aggregate-to-view: "true"
|
||||
name: system:aggregated-metrics-reader
|
||||
namespace: monitoring
|
||||
rules:
|
||||
- apiGroups:
|
||||
- metrics.k8s.io
|
||||
|
@ -1,7 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
|
@ -1,7 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: resource-metrics:system:auth-delegator
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
|
@ -1,7 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: resource-metrics-server-resources
|
||||
namespace: monitoring
|
||||
rules:
|
||||
- apiGroups:
|
||||
- metrics.k8s.io
|
||||
|
@ -4,8 +4,26 @@ data:
|
||||
"resourceRules":
|
||||
"cpu":
|
||||
"containerLabel": "container"
|
||||
"containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
|
||||
"nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)"
|
||||
"containerQuery": |
|
||||
sum by (<<.GroupBy>>) (
|
||||
irate (
|
||||
container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s]
|
||||
)
|
||||
)
|
||||
"nodeQuery": |
|
||||
sum by (<<.GroupBy>>) (
|
||||
1 - irate(
|
||||
node_cpu_seconds_total{mode="idle"}[60s]
|
||||
)
|
||||
* on(namespace, pod) group_left(node) (
|
||||
node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}
|
||||
)
|
||||
)
|
||||
or sum by (<<.GroupBy>>) (
|
||||
1 - irate(
|
||||
windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[4m]
|
||||
)
|
||||
)
|
||||
"resources":
|
||||
"overrides":
|
||||
"namespace":
|
||||
@ -16,8 +34,21 @@ data:
|
||||
"resource": "pod"
|
||||
"memory":
|
||||
"containerLabel": "container"
|
||||
"containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
|
||||
"nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
|
||||
"containerQuery": |
|
||||
sum by (<<.GroupBy>>) (
|
||||
container_memory_working_set_bytes{<<.LabelMatchers>>,container!="",pod!=""}
|
||||
)
|
||||
"nodeQuery": |
|
||||
sum by (<<.GroupBy>>) (
|
||||
node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>}
|
||||
-
|
||||
node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}
|
||||
)
|
||||
or sum by (<<.GroupBy>>) (
|
||||
windows_cs_physical_memory_bytes{job="windows-exporter",<<.LabelMatchers>>}
|
||||
-
|
||||
windows_memory_available_bytes{job="windows-exporter",<<.LabelMatchers>>}
|
||||
)
|
||||
"resources":
|
||||
"overrides":
|
||||
"instance":
|
||||
@ -29,5 +60,10 @@ data:
|
||||
"window": "5m"
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: adapter-config
|
||||
namespace: monitoring
|
||||
|
@ -1,22 +1,46 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
replicas: 1
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
name: prometheus-adapter
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
strategy:
|
||||
rollingUpdate:
|
||||
maxSurge: 1
|
||||
maxUnavailable: 0
|
||||
maxUnavailable: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: prometheus-adapter
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
spec:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
namespaces:
|
||||
- monitoring
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
automountServiceAccountToken: true
|
||||
containers:
|
||||
- args:
|
||||
- --cert-dir=/var/run/serving-cert
|
||||
@ -25,10 +49,41 @@ spec:
|
||||
- --metrics-relist-interval=1m
|
||||
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
|
||||
- --secure-port=6443
|
||||
image: directxman12/k8s-prometheus-adapter:v0.7.0
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
|
||||
image: registry.k8s.io/prometheus-adapter/prometheus-adapter:v0.10.0
|
||||
livenessProbe:
|
||||
failureThreshold: 5
|
||||
httpGet:
|
||||
path: /livez
|
||||
port: https
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 5
|
||||
name: prometheus-adapter
|
||||
ports:
|
||||
- containerPort: 6443
|
||||
name: https
|
||||
readinessProbe:
|
||||
failureThreshold: 5
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: https
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 5
|
||||
resources:
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 180Mi
|
||||
requests:
|
||||
cpu: 102m
|
||||
memory: 180Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
volumeMounts:
|
||||
- mountPath: /tmp
|
||||
name: tmpfs
|
||||
|
23
manifests/prometheus-adapter-networkPolicy.yaml
Normal file
23
manifests/prometheus-adapter-networkPolicy.yaml
Normal file
@ -0,0 +1,23 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- {}
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
17
manifests/prometheus-adapter-podDisruptionBudget.yaml
Normal file
17
manifests/prometheus-adapter-podDisruptionBudget.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
minAvailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
@ -1,6 +1,11 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: resource-metrics-auth-reader
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
|
@ -2,7 +2,10 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
name: prometheus-adapter
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -11,4 +14,6 @@ spec:
|
||||
port: 443
|
||||
targetPort: 6443
|
||||
selector:
|
||||
name: prometheus-adapter
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,5 +1,11 @@
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
|
28
manifests/prometheus-adapter-serviceMonitor.yaml
Normal file
28
manifests/prometheus-adapter-serviceMonitor.yaml
Normal file
@ -0,0 +1,28 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.10.0
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: (apiserver_client_certificate_.*|apiserver_envelope_.*|apiserver_flowcontrol_.*|apiserver_storage_.*|apiserver_webhooks_.*|workqueue_.*)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: https
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
@ -1,6 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
rules:
|
||||
- apiGroups:
|
||||
|
@ -1,6 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
@ -1,15 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-controller-manager
|
||||
name: kube-controller-manager-prometheus-discovery
|
||||
namespace: kube-system
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 10252
|
||||
targetPort: 10252
|
||||
selector:
|
||||
component: kube-controller-manager
|
@ -1,15 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
name: kube-dns-prometheus-discovery
|
||||
namespace: kube-system
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 9153
|
||||
targetPort: 9153
|
||||
selector:
|
||||
k8s-app: kube-dns
|
@ -1,15 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-scheduler
|
||||
name: kube-scheduler-prometheus-discovery
|
||||
namespace: kube-system
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 10251
|
||||
targetPort: 10251
|
||||
selector:
|
||||
component: kube-scheduler
|
40
manifests/prometheus-networkPolicy.yaml
Normal file
40
manifests/prometheus-networkPolicy.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: TCP
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: grafana
|
||||
ports:
|
||||
- port: 9090
|
||||
protocol: TCP
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
@ -4,7 +4,8 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.40.0
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -19,4 +20,5 @@ spec:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.40.0
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
|
19
manifests/prometheus-podDisruptionBudget.yaml
Normal file
19
manifests/prometheus-podDisruptionBudget.yaml
Normal file
@ -0,0 +1,19 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
spec:
|
||||
minAvailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
@ -2,7 +2,11 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
labels:
|
||||
prometheus: k8s
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: k8s
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -11,35 +15,45 @@ spec:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: prometheus
|
||||
operator: In
|
||||
values:
|
||||
- k8s
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
namespaces:
|
||||
- monitoring
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- name: alertmanager-main
|
||||
- apiVersion: v2
|
||||
name: alertmanager-main
|
||||
namespace: monitoring
|
||||
port: web
|
||||
enableFeatures: []
|
||||
externalLabels: {}
|
||||
externalUrl: http://prometheus.192.168.1.15.nip.io
|
||||
image: prom/prometheus:v2.19.1
|
||||
image: quay.io/prometheus/prometheus:v2.41.0
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
podMetadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
podMonitorNamespaceSelector: {}
|
||||
podMonitorSelector: {}
|
||||
probeNamespaceSelector: {}
|
||||
probeSelector: {}
|
||||
replicas: 1
|
||||
resources:
|
||||
requests:
|
||||
memory: 400Mi
|
||||
retention: 15d
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
ruleNamespaceSelector: {}
|
||||
ruleSelector: {}
|
||||
scrapeInterval: 30s
|
||||
scrapeTimeout: 30s
|
||||
securityContext:
|
||||
@ -49,4 +63,4 @@ spec:
|
||||
serviceAccountName: prometheus-k8s
|
||||
serviceMonitorNamespaceSelector: {}
|
||||
serviceMonitorSelector: {}
|
||||
version: v2.19.1
|
||||
version: 2.41.0
|
||||
|
280
manifests/prometheus-prometheusRule.yaml
Normal file
280
manifests/prometheus-prometheusRule.yaml
Normal file
@ -0,0 +1,280 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
name: prometheus-k8s-prometheus-rules
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: prometheus
|
||||
rules:
|
||||
- alert: PrometheusBadConfig
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig
|
||||
summary: Failed Prometheus configuration reload.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusNotificationQueueRunningFull
|
||||
annotations:
|
||||
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull
|
||||
summary: Prometheus alert notification queue predicted to run full in less than 30m.
|
||||
expr: |
|
||||
# Without min_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30)
|
||||
>
|
||||
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers
|
||||
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
|
||||
expr: |
|
||||
(
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
)
|
||||
* 100
|
||||
> 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusNotConnectedToAlertmanagers
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers
|
||||
summary: Prometheus is not connected to any Alertmanagers.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTSDBReloadsFailing
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing
|
||||
summary: Prometheus has issues reloading blocks from disk.
|
||||
expr: |
|
||||
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
|
||||
for: 4h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTSDBCompactionsFailing
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing
|
||||
summary: Prometheus has issues compacting blocks.
|
||||
expr: |
|
||||
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
|
||||
for: 4h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusNotIngestingSamples
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples
|
||||
summary: Prometheus is not ingesting samples.
|
||||
expr: |
|
||||
(
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
|
||||
and
|
||||
(
|
||||
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
|
||||
or
|
||||
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
|
||||
)
|
||||
)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusDuplicateTimestamps
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps
|
||||
summary: Prometheus is dropping samples with duplicate timestamps.
|
||||
expr: |
|
||||
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOutOfOrderTimestamps
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps
|
||||
summary: Prometheus drops samples with out-of-order timestamps.
|
||||
expr: |
|
||||
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusRemoteStorageFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures
|
||||
summary: Prometheus fails to send samples to remote storage.
|
||||
expr: |
|
||||
(
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
|
||||
/
|
||||
(
|
||||
(rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
|
||||
+
|
||||
(rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
|
||||
)
|
||||
)
|
||||
* 100
|
||||
> 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteBehind
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind
|
||||
summary: Prometheus remote write is behind.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
- ignoring(remote_name, url) group_right
|
||||
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
)
|
||||
> 120
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteDesiredShards
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards
|
||||
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
(
|
||||
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
>
|
||||
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusRuleFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures
|
||||
summary: Prometheus is failing rule evaluations.
|
||||
expr: |
|
||||
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusMissingRuleEvaluations
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations
|
||||
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
||||
expr: |
|
||||
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTargetLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit
|
||||
summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
|
||||
expr: |
|
||||
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusLabelLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit
|
||||
summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
|
||||
expr: |
|
||||
increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusScrapeBodySizeLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit
|
||||
summary: Prometheus has dropped some targets that exceeded body size limit.
|
||||
expr: |
|
||||
increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusScrapeSampleLimitHit
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit
|
||||
summary: Prometheus has failed scrapes that have exceeded the configured sample limit.
|
||||
expr: |
|
||||
increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusTargetSyncFailure
|
||||
annotations:
|
||||
description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure
|
||||
summary: Prometheus has failed to sync targets.
|
||||
expr: |
|
||||
increase(prometheus_target_sync_failed_total{job="prometheus-k8s",namespace="monitoring"}[30m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: PrometheusHighQueryLoad
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload
|
||||
summary: Prometheus is reaching its maximum capacity serving concurrent requests.
|
||||
expr: |
|
||||
avg_over_time(prometheus_engine_queries{job="prometheus-k8s",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.8
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager
|
||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||
expr: |
|
||||
min without (alertmanager) (
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
|
||||
)
|
||||
* 100
|
||||
> 3
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
@ -1,6 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s-config
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
|
@ -3,6 +3,12 @@ items:
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: default
|
||||
roleRef:
|
||||
@ -16,6 +22,12 @@ items:
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
@ -29,6 +41,12 @@ items:
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
|
@ -1,6 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s-config
|
||||
namespace: monitoring
|
||||
rules:
|
||||
|
@ -3,6 +3,12 @@ items:
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: default
|
||||
rules:
|
||||
@ -16,9 +22,31 @@ items:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- extensions
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: kube-system
|
||||
rules:
|
||||
@ -32,9 +60,31 @@ items:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- extensions
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
rules:
|
||||
@ -48,4 +98,20 @@ items:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- extensions
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
kind: RoleList
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,7 +2,11 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
prometheus: k8s
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -10,7 +14,12 @@ spec:
|
||||
- name: web
|
||||
port: 9090
|
||||
targetPort: web
|
||||
- name: reloader-web
|
||||
port: 8080
|
||||
targetPort: reloader-web
|
||||
selector:
|
||||
app: prometheus
|
||||
prometheus: k8s
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
sessionAffinity: ClientIP
|
||||
|
@ -1,5 +1,12 @@
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: true
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
|
@ -2,13 +2,22 @@ apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: prometheus
|
||||
name: prometheus
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 2.41.0
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- interval: 30s
|
||||
port: web
|
||||
- interval: 30s
|
||||
port: reloader-web
|
||||
selector:
|
||||
matchLabels:
|
||||
prometheus: k8s
|
||||
app.kubernetes.io/component: prometheus
|
||||
app.kubernetes.io/instance: k8s
|
||||
app.kubernetes.io/name: prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,74 +0,0 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: apiserver
|
||||
name: kube-apiserver
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(debugging|disk|request|server).*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_admission_controller_admission_latencies_seconds_.*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_admission_step_admission_latencies_seconds_.*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- le
|
||||
port: https
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
serverName: kubernetes
|
||||
jobLabel: component
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- default
|
||||
selector:
|
||||
matchLabels:
|
||||
component: apiserver
|
||||
provider: kubernetes
|
@ -1,19 +0,0 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: coredns
|
||||
name: coredns
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
interval: 15s
|
||||
port: metrics
|
||||
jobLabel: k8s-app
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
@ -1,55 +0,0 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-controller-manager
|
||||
name: kube-controller-manager
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(debugging|disk|request|server).*
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: http-metrics
|
||||
jobLabel: k8s-app
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-controller-manager
|
@ -1,18 +0,0 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-scheduler
|
||||
name: kube-scheduler
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- interval: 30s
|
||||
port: http-metrics
|
||||
jobLabel: k8s-app
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-scheduler
|
@ -1,77 +0,0 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kubelet
|
||||
name: kubelet
|
||||
namespace: monitoring
|
||||
spec:
|
||||
endpoints:
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: transformation_(transformation_latencies_microseconds|failures_total)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: https-metrics
|
||||
relabelings:
|
||||
- sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
honorLabels: true
|
||||
interval: 30s
|
||||
metricRelabelings:
|
||||
- action: drop
|
||||
regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
path: /metrics/cadvisor
|
||||
port: https-metrics
|
||||
relabelings:
|
||||
- sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
jobLabel: k8s-app
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kubelet
|
83
manifests/setup/0namespace-prometheusRule.yaml
Normal file
83
manifests/setup/0namespace-prometheusRule.yaml
Normal file
@ -0,0 +1,83 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: exporter
|
||||
app.kubernetes.io/name: kube-prometheus
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
name: kube-prometheus-rules
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: general.rules
|
||||
rules:
|
||||
- alert: TargetDown
|
||||
annotations:
|
||||
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
|
||||
summary: One or more targets are unreachable.
|
||||
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: Watchdog
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||
and always fire against a receiver. There are integrations with various notification
|
||||
mechanisms that send a notification when this alert is not firing. For example the
|
||||
"DeadMansSnitch" integration in PagerDuty.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
|
||||
summary: An alert that should always be firing to certify that Alertmanager is working properly.
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: none
|
||||
- alert: InfoInhibitor
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert that is used to inhibit info alerts.
|
||||
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
|
||||
other alerts.
|
||||
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
|
||||
severity of 'warning' or 'critical' starts firing on the same namespace.
|
||||
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
|
||||
summary: Info-level alert inhibition.
|
||||
expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
|
||||
labels:
|
||||
severity: none
|
||||
- name: node-network
|
||||
rules:
|
||||
- alert: NodeNetworkInterfaceFlapping
|
||||
annotations:
|
||||
description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping
|
||||
summary: Network interface is often changing its status
|
||||
expr: |
|
||||
changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: kube-prometheus-node-recording.rules
|
||||
rules:
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)
|
||||
record: instance:node_cpu:rate:sum
|
||||
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
|
||||
record: instance:node_network_receive_bytes:rate:sum
|
||||
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
|
||||
record: instance:node_network_transmit_bytes:rate:sum
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
|
||||
record: instance:node_cpu:ratio
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
||||
record: cluster:node_cpu:sum_rate5m
|
||||
- expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
|
||||
record: cluster:node_cpu:ratio
|
||||
- name: kube-prometheus-general.rules
|
||||
rules:
|
||||
- expr: count without(instance, pod, node) (up == 1)
|
||||
record: count:up1
|
||||
- expr: count without(instance, pod, node) (up == 0)
|
||||
record: count:up0
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -2,15 +2,19 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.9.2
|
||||
creationTimestamp: null
|
||||
name: podmonitors.monitoring.coreos.com
|
||||
spec:
|
||||
group: monitoring.coreos.com
|
||||
names:
|
||||
categories:
|
||||
- prometheus-operator
|
||||
kind: PodMonitor
|
||||
listKind: PodMonitorList
|
||||
plural: podmonitors
|
||||
shortNames:
|
||||
- pmon
|
||||
singular: podmonitor
|
||||
scope: Namespaced
|
||||
versions:
|
||||
@ -30,9 +34,28 @@ spec:
|
||||
spec:
|
||||
description: Specification of desired Pod selection for target discovery by Prometheus.
|
||||
properties:
|
||||
attachMetadata:
|
||||
description: Attaches node metadata to discovered targets. Requires Prometheus v2.35.0 and above.
|
||||
properties:
|
||||
node:
|
||||
description: When set to true, Prometheus must have permissions to get Nodes.
|
||||
type: boolean
|
||||
type: object
|
||||
jobLabel:
|
||||
description: The label to use to retrieve the job name from.
|
||||
type: string
|
||||
labelLimit:
|
||||
description: Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
labelNameLengthLimit:
|
||||
description: Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
labelValueLengthLimit:
|
||||
description: Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
namespaceSelector:
|
||||
description: Selector to select which namespaces the Endpoints objects are discovered from.
|
||||
properties:
|
||||
@ -40,7 +63,7 @@ spec:
|
||||
description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||
type: boolean
|
||||
matchNames:
|
||||
description: List of namespace names.
|
||||
description: List of namespace names to select from.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
@ -50,6 +73,90 @@ spec:
|
||||
items:
|
||||
description: PodMetricsEndpoint defines a scrapeable endpoint of a Kubernetes Pod serving Prometheus metrics.
|
||||
properties:
|
||||
authorization:
|
||||
description: Authorization section for this endpoint
|
||||
properties:
|
||||
credentials:
|
||||
description: The secret's key that contains the credentials of the request
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type:
|
||||
description: Set the authentication type. Defaults to Bearer, Basic will cause an error
|
||||
type: string
|
||||
type: object
|
||||
basicAuth:
|
||||
description: 'BasicAuth allow an endpoint to authenticate over basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
||||
properties:
|
||||
password:
|
||||
description: The secret in the service monitor namespace that contains the password for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
username:
|
||||
description: The secret in the service monitor namespace that contains the username for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
bearerTokenSecret:
|
||||
description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the pod monitor and accessible by the Prometheus Operator.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
enableHttp2:
|
||||
description: Whether to enable HTTP2.
|
||||
type: boolean
|
||||
filterRunning:
|
||||
description: 'Drop pods that are not running. (Failed, Succeeded). Enabled by default. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase'
|
||||
type: boolean
|
||||
followRedirects:
|
||||
description: FollowRedirects configures whether scrape requests follow HTTP 3xx redirects.
|
||||
type: boolean
|
||||
honorLabels:
|
||||
description: HonorLabels chooses the metric's labels on collisions with target labels.
|
||||
type: boolean
|
||||
@ -57,7 +164,8 @@ spec:
|
||||
description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
|
||||
type: boolean
|
||||
interval:
|
||||
description: Interval at which metrics should be scraped
|
||||
description: Interval at which metrics should be scraped If not specified Prometheus' global scrape interval is used.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
metricRelabelings:
|
||||
description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||
@ -65,7 +173,27 @@ spec:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
default: replace
|
||||
description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36.
|
||||
enum:
|
||||
- replace
|
||||
- Replace
|
||||
- keep
|
||||
- Keep
|
||||
- drop
|
||||
- Drop
|
||||
- hashmod
|
||||
- HashMod
|
||||
- labelmap
|
||||
- LabelMap
|
||||
- labeldrop
|
||||
- LabelDrop
|
||||
- labelkeep
|
||||
- LabelKeep
|
||||
- lowercase
|
||||
- Lowercase
|
||||
- uppercase
|
||||
- Uppercase
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
@ -83,6 +211,8 @@ spec:
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores.
|
||||
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
@ -90,6 +220,80 @@ spec:
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
oauth2:
|
||||
description: OAuth2 for the URL. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
properties:
|
||||
clientId:
|
||||
description: The secret or configmap containing the OAuth2 client id
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
clientSecret:
|
||||
description: The secret containing the OAuth2 client secret
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
endpointParams:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Parameters to append to the token URL
|
||||
type: object
|
||||
scopes:
|
||||
description: OAuth2 scopes used for the token request
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
tokenUrl:
|
||||
description: The URL to fetch the token from
|
||||
minLength: 1
|
||||
type: string
|
||||
required:
|
||||
- clientId
|
||||
- clientSecret
|
||||
- tokenUrl
|
||||
type: object
|
||||
params:
|
||||
additionalProperties:
|
||||
items:
|
||||
@ -98,7 +302,7 @@ spec:
|
||||
description: Optional HTTP URL parameters
|
||||
type: object
|
||||
path:
|
||||
description: HTTP path to scrape for metrics.
|
||||
description: HTTP path to scrape for metrics. If empty, Prometheus uses the default value (e.g. `/metrics`).
|
||||
type: string
|
||||
port:
|
||||
description: Name of the pod port this endpoint refers to. Mutually exclusive with targetPort.
|
||||
@ -107,12 +311,32 @@ spec:
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
|
||||
type: string
|
||||
relabelings:
|
||||
description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
description: 'RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds relabelings for a few standard Kubernetes fields. The original scrape job''s name is available via the `__tmp_prometheus_job_name` label. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
default: replace
|
||||
description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36.
|
||||
enum:
|
||||
- replace
|
||||
- Replace
|
||||
- keep
|
||||
- Keep
|
||||
- drop
|
||||
- Drop
|
||||
- hashmod
|
||||
- HashMod
|
||||
- labelmap
|
||||
- LabelMap
|
||||
- labeldrop
|
||||
- LabelDrop
|
||||
- labelkeep
|
||||
- LabelKeep
|
||||
- lowercase
|
||||
- Lowercase
|
||||
- uppercase
|
||||
- Uppercase
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
@ -130,6 +354,8 @@ spec:
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores.
|
||||
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
@ -141,7 +367,8 @@ spec:
|
||||
description: HTTP scheme to use for scraping.
|
||||
type: string
|
||||
scrapeTimeout:
|
||||
description: Timeout after which the scrape is ended
|
||||
description: Timeout after which the scrape is ended If not specified, the Prometheus global scrape interval is used.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
targetPort:
|
||||
anyOf:
|
||||
@ -149,6 +376,104 @@ spec:
|
||||
- type: string
|
||||
description: 'Deprecated: Use ''port'' instead.'
|
||||
x-kubernetes-int-or-string: true
|
||||
tlsConfig:
|
||||
description: TLS configuration to use when scraping the endpoint.
|
||||
properties:
|
||||
ca:
|
||||
description: Certificate authority used when verifying server certificates.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
cert:
|
||||
description: Client certificate to present when doing client-authentication.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
insecureSkipVerify:
|
||||
description: Disable target certificate validation.
|
||||
type: boolean
|
||||
keySecret:
|
||||
description: Secret containing the client key file for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
serverName:
|
||||
description: Used to verify the hostname for the targets.
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
type: array
|
||||
podTargetLabels:
|
||||
@ -190,6 +515,11 @@ spec:
|
||||
description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
targetLimit:
|
||||
description: TargetLimit defines a limit on the number of scraped targets that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
required:
|
||||
- podMetricsEndpoints
|
||||
- selector
|
||||
@ -199,9 +529,3 @@ spec:
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
status:
|
||||
acceptedNames:
|
||||
kind: ""
|
||||
plural: ""
|
||||
conditions: []
|
||||
storedVersions: []
|
||||
|
@ -0,0 +1,566 @@
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.9.2
|
||||
creationTimestamp: null
|
||||
name: probes.monitoring.coreos.com
|
||||
spec:
|
||||
group: monitoring.coreos.com
|
||||
names:
|
||||
categories:
|
||||
- prometheus-operator
|
||||
kind: Probe
|
||||
listKind: ProbeList
|
||||
plural: probes
|
||||
shortNames:
|
||||
- prb
|
||||
singular: probe
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: Probe defines monitoring for a set of static targets or ingresses.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: Specification of desired Ingress selection for target discovery by Prometheus.
|
||||
properties:
|
||||
authorization:
|
||||
description: Authorization section for this endpoint
|
||||
properties:
|
||||
credentials:
|
||||
description: The secret's key that contains the credentials of the request
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type:
|
||||
description: Set the authentication type. Defaults to Bearer, Basic will cause an error
|
||||
type: string
|
||||
type: object
|
||||
basicAuth:
|
||||
description: 'BasicAuth allow an endpoint to authenticate over basic authentication. More info: https://prometheus.io/docs/operating/configuration/#endpoint'
|
||||
properties:
|
||||
password:
|
||||
description: The secret in the service monitor namespace that contains the password for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
username:
|
||||
description: The secret in the service monitor namespace that contains the username for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
bearerTokenSecret:
|
||||
description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the probe and accessible by the Prometheus Operator.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
interval:
|
||||
description: Interval at which targets are probed using the configured prober. If not specified Prometheus' global scrape interval is used.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
jobName:
|
||||
description: The job name assigned to scraped metrics by default.
|
||||
type: string
|
||||
labelLimit:
|
||||
description: Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
labelNameLengthLimit:
|
||||
description: Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
labelValueLengthLimit:
|
||||
description: Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
metricRelabelings:
|
||||
description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
default: replace
|
||||
description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36.
|
||||
enum:
|
||||
- replace
|
||||
- Replace
|
||||
- keep
|
||||
- Keep
|
||||
- drop
|
||||
- Drop
|
||||
- hashmod
|
||||
- HashMod
|
||||
- labelmap
|
||||
- LabelMap
|
||||
- labeldrop
|
||||
- LabelDrop
|
||||
- labelkeep
|
||||
- LabelKeep
|
||||
- lowercase
|
||||
- Lowercase
|
||||
- uppercase
|
||||
- Uppercase
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores.
|
||||
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
module:
|
||||
description: 'The module to use for probing specifying how to probe the target. Example module configuring in the blackbox exporter: https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
|
||||
type: string
|
||||
oauth2:
|
||||
description: OAuth2 for the URL. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
properties:
|
||||
clientId:
|
||||
description: The secret or configmap containing the OAuth2 client id
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
clientSecret:
|
||||
description: The secret containing the OAuth2 client secret
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
endpointParams:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Parameters to append to the token URL
|
||||
type: object
|
||||
scopes:
|
||||
description: OAuth2 scopes used for the token request
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
tokenUrl:
|
||||
description: The URL to fetch the token from
|
||||
minLength: 1
|
||||
type: string
|
||||
required:
|
||||
- clientId
|
||||
- clientSecret
|
||||
- tokenUrl
|
||||
type: object
|
||||
prober:
|
||||
description: Specification for the prober to use for probing targets. The prober.URL parameter is required. Targets cannot be probed if left empty.
|
||||
properties:
|
||||
path:
|
||||
default: /probe
|
||||
description: Path to collect metrics from. Defaults to `/probe`.
|
||||
type: string
|
||||
proxyUrl:
|
||||
description: Optional ProxyURL.
|
||||
type: string
|
||||
scheme:
|
||||
description: HTTP scheme to use for scraping. Defaults to `http`.
|
||||
type: string
|
||||
url:
|
||||
description: Mandatory URL of the prober.
|
||||
type: string
|
||||
required:
|
||||
- url
|
||||
type: object
|
||||
sampleLimit:
|
||||
description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
scrapeTimeout:
|
||||
description: Timeout for scraping metrics from the Prometheus exporter. If not specified, the Prometheus global scrape interval is used.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
targetLimit:
|
||||
description: TargetLimit defines a limit on the number of scraped targets that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
targets:
|
||||
description: Targets defines a set of static or dynamically discovered targets to probe.
|
||||
properties:
|
||||
ingress:
|
||||
description: ingress defines the Ingress objects to probe and the relabeling configuration. If `staticConfig` is also defined, `staticConfig` takes precedence.
|
||||
properties:
|
||||
namespaceSelector:
|
||||
description: From which namespaces to select Ingress objects.
|
||||
properties:
|
||||
any:
|
||||
description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||
type: boolean
|
||||
matchNames:
|
||||
description: List of namespace names to select from.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
relabelingConfigs:
|
||||
description: 'RelabelConfigs to apply to the label set of the target before it gets scraped. The original ingress address is available via the `__tmp_prometheus_ingress_address` label. It can be used to customize the probed URL. The original scrape job''s name is available via the `__tmp_prometheus_job_name` label. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
default: replace
|
||||
description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36.
|
||||
enum:
|
||||
- replace
|
||||
- Replace
|
||||
- keep
|
||||
- Keep
|
||||
- drop
|
||||
- Drop
|
||||
- hashmod
|
||||
- HashMod
|
||||
- labelmap
|
||||
- LabelMap
|
||||
- labeldrop
|
||||
- LabelDrop
|
||||
- labelkeep
|
||||
- LabelKeep
|
||||
- lowercase
|
||||
- Lowercase
|
||||
- uppercase
|
||||
- Uppercase
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores.
|
||||
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
selector:
|
||||
description: Selector to select the Ingress objects.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector applies to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- key
|
||||
- operator
|
||||
type: object
|
||||
type: array
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
staticConfig:
|
||||
description: 'staticConfig defines the static list of targets to probe and the relabeling configuration. If `ingress` is also defined, `staticConfig` takes precedence. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
|
||||
properties:
|
||||
labels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Labels assigned to all metrics scraped from the targets.
|
||||
type: object
|
||||
relabelingConfigs:
|
||||
description: 'RelabelConfigs to apply to the label set of the targets before it gets scraped. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
default: replace
|
||||
description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36.
|
||||
enum:
|
||||
- replace
|
||||
- Replace
|
||||
- keep
|
||||
- Keep
|
||||
- drop
|
||||
- Drop
|
||||
- hashmod
|
||||
- HashMod
|
||||
- labelmap
|
||||
- LabelMap
|
||||
- labeldrop
|
||||
- LabelDrop
|
||||
- labelkeep
|
||||
- LabelKeep
|
||||
- lowercase
|
||||
- Lowercase
|
||||
- uppercase
|
||||
- Uppercase
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores.
|
||||
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
static:
|
||||
description: The list of hosts to probe.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
tlsConfig:
|
||||
description: TLS configuration to use when scraping the endpoint.
|
||||
properties:
|
||||
ca:
|
||||
description: Certificate authority used when verifying server certificates.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
cert:
|
||||
description: Client certificate to present when doing client-authentication.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
insecureSkipVerify:
|
||||
description: Disable target certificate validation.
|
||||
type: boolean
|
||||
keySecret:
|
||||
description: Secret containing the client key file for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
serverName:
|
||||
description: Used to verify the hostname for the targets.
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
required:
|
||||
- spec
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
File diff suppressed because it is too large
Load Diff
@ -2,22 +2,26 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.9.2
|
||||
creationTimestamp: null
|
||||
name: prometheusrules.monitoring.coreos.com
|
||||
spec:
|
||||
group: monitoring.coreos.com
|
||||
names:
|
||||
categories:
|
||||
- prometheus-operator
|
||||
kind: PrometheusRule
|
||||
listKind: PrometheusRuleList
|
||||
plural: prometheusrules
|
||||
shortNames:
|
||||
- promrule
|
||||
singular: prometheusrule
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: PrometheusRule defines alerting rules for a Prometheus instance
|
||||
description: PrometheusRule defines recording and alerting rules for a Prometheus instance
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
@ -33,36 +37,50 @@ spec:
|
||||
groups:
|
||||
description: Content of Prometheus rule file
|
||||
items:
|
||||
description: 'RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
|
||||
description: RuleGroup is a list of sequentially evaluated recording and alerting rules.
|
||||
properties:
|
||||
interval:
|
||||
description: Interval determines how often rules in the group are evaluated.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
name:
|
||||
description: Name of the rule group.
|
||||
minLength: 1
|
||||
type: string
|
||||
partial_response_strategy:
|
||||
description: 'PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response'
|
||||
pattern: ^(?i)(abort|warn)?$
|
||||
type: string
|
||||
rules:
|
||||
description: List of alerting and recording rules.
|
||||
items:
|
||||
description: Rule describes an alerting or recording rule.
|
||||
description: 'Rule describes an alerting or recording rule See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules) rule'
|
||||
properties:
|
||||
alert:
|
||||
description: Name of the alert. Must be a valid label value. Only one of `record` and `alert` must be set.
|
||||
type: string
|
||||
annotations:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Annotations to add to each alert. Only valid for alerting rules.
|
||||
type: object
|
||||
expr:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: PromQL expression to evaluate.
|
||||
x-kubernetes-int-or-string: true
|
||||
for:
|
||||
description: Alerts are considered firing once they have been returned for this long.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
labels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Labels to add or overwrite.
|
||||
type: object
|
||||
record:
|
||||
description: Name of the time series to output to. Must be a valid metric name. Only one of `record` and `alert` must be set.
|
||||
type: string
|
||||
required:
|
||||
- expr
|
||||
@ -73,15 +91,12 @@ spec:
|
||||
- rules
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- name
|
||||
x-kubernetes-list-type: map
|
||||
type: object
|
||||
required:
|
||||
- spec
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
status:
|
||||
acceptedNames:
|
||||
kind: ""
|
||||
plural: ""
|
||||
conditions: []
|
||||
storedVersions: []
|
||||
|
@ -2,15 +2,19 @@ apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.2.4
|
||||
controller-gen.kubebuilder.io/version: v0.9.2
|
||||
creationTimestamp: null
|
||||
name: servicemonitors.monitoring.coreos.com
|
||||
spec:
|
||||
group: monitoring.coreos.com
|
||||
names:
|
||||
categories:
|
||||
- prometheus-operator
|
||||
kind: ServiceMonitor
|
||||
listKind: ServiceMonitorList
|
||||
plural: servicemonitors
|
||||
shortNames:
|
||||
- smon
|
||||
singular: servicemonitor
|
||||
scope: Namespaced
|
||||
versions:
|
||||
@ -30,11 +34,41 @@ spec:
|
||||
spec:
|
||||
description: Specification of desired Service selection for target discovery by Prometheus.
|
||||
properties:
|
||||
attachMetadata:
|
||||
description: Attaches node metadata to discovered targets. Requires Prometheus v2.37.0 and above.
|
||||
properties:
|
||||
node:
|
||||
description: When set to true, Prometheus must have permissions to get Nodes.
|
||||
type: boolean
|
||||
type: object
|
||||
endpoints:
|
||||
description: A list of endpoints allowed as part of this ServiceMonitor.
|
||||
items:
|
||||
description: Endpoint defines a scrapeable endpoint serving Prometheus metrics.
|
||||
properties:
|
||||
authorization:
|
||||
description: Authorization section for this endpoint
|
||||
properties:
|
||||
credentials:
|
||||
description: The secret's key that contains the credentials of the request
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type:
|
||||
description: Set the authentication type. Defaults to Bearer, Basic will cause an error
|
||||
type: string
|
||||
type: object
|
||||
basicAuth:
|
||||
description: 'BasicAuth allow an endpoint to authenticate over basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
|
||||
properties:
|
||||
@ -53,6 +87,7 @@ spec:
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
username:
|
||||
description: The secret in the service monitor namespace that contains the username for authentication.
|
||||
properties:
|
||||
@ -68,6 +103,7 @@ spec:
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
bearerTokenFile:
|
||||
description: File to read bearer token for scraping targets.
|
||||
@ -87,6 +123,16 @@ spec:
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
enableHttp2:
|
||||
description: Whether to enable HTTP2.
|
||||
type: boolean
|
||||
filterRunning:
|
||||
description: 'Drop pods that are not running. (Failed, Succeeded). Enabled by default. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase'
|
||||
type: boolean
|
||||
followRedirects:
|
||||
description: FollowRedirects configures whether scrape requests follow HTTP 3xx redirects.
|
||||
type: boolean
|
||||
honorLabels:
|
||||
description: HonorLabels chooses the metric's labels on collisions with target labels.
|
||||
type: boolean
|
||||
@ -94,7 +140,8 @@ spec:
|
||||
description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
|
||||
type: boolean
|
||||
interval:
|
||||
description: Interval at which metrics should be scraped
|
||||
description: Interval at which metrics should be scraped If not specified Prometheus' global scrape interval is used.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
metricRelabelings:
|
||||
description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||
@ -102,7 +149,27 @@ spec:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
default: replace
|
||||
description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36.
|
||||
enum:
|
||||
- replace
|
||||
- Replace
|
||||
- keep
|
||||
- Keep
|
||||
- drop
|
||||
- Drop
|
||||
- hashmod
|
||||
- HashMod
|
||||
- labelmap
|
||||
- LabelMap
|
||||
- labeldrop
|
||||
- LabelDrop
|
||||
- labelkeep
|
||||
- LabelKeep
|
||||
- lowercase
|
||||
- Lowercase
|
||||
- uppercase
|
||||
- Uppercase
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
@ -120,6 +187,8 @@ spec:
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores.
|
||||
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
@ -127,6 +196,80 @@ spec:
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
oauth2:
|
||||
description: OAuth2 for the URL. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
properties:
|
||||
clientId:
|
||||
description: The secret or configmap containing the OAuth2 client id
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
clientSecret:
|
||||
description: The secret containing the OAuth2 client secret
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
endpointParams:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Parameters to append to the token URL
|
||||
type: object
|
||||
scopes:
|
||||
description: OAuth2 scopes used for the token request
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
tokenUrl:
|
||||
description: The URL to fetch the token from
|
||||
minLength: 1
|
||||
type: string
|
||||
required:
|
||||
- clientId
|
||||
- clientSecret
|
||||
- tokenUrl
|
||||
type: object
|
||||
params:
|
||||
additionalProperties:
|
||||
items:
|
||||
@ -135,7 +278,7 @@ spec:
|
||||
description: Optional HTTP URL parameters
|
||||
type: object
|
||||
path:
|
||||
description: HTTP path to scrape for metrics.
|
||||
description: HTTP path to scrape for metrics. If empty, Prometheus uses the default value (e.g. `/metrics`).
|
||||
type: string
|
||||
port:
|
||||
description: Name of the service port this endpoint refers to. Mutually exclusive with targetPort.
|
||||
@ -144,12 +287,32 @@ spec:
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
|
||||
type: string
|
||||
relabelings:
|
||||
description: 'RelabelConfigs to apply to samples before scraping. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
description: 'RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds relabelings for a few standard Kubernetes fields. The original scrape job''s name is available via the `__tmp_prometheus_job_name` label. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
default: replace
|
||||
description: Action to perform based on regex matching. Default is 'replace'. uppercase and lowercase actions require Prometheus >= 2.36.
|
||||
enum:
|
||||
- replace
|
||||
- Replace
|
||||
- keep
|
||||
- Keep
|
||||
- drop
|
||||
- Drop
|
||||
- hashmod
|
||||
- HashMod
|
||||
- labelmap
|
||||
- LabelMap
|
||||
- labeldrop
|
||||
- LabelDrop
|
||||
- labelkeep
|
||||
- LabelKeep
|
||||
- lowercase
|
||||
- Lowercase
|
||||
- uppercase
|
||||
- Uppercase
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
@ -167,6 +330,8 @@ spec:
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
description: LabelName is a valid Prometheus label name which may only contain ASCII letters, numbers, as well as underscores.
|
||||
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
@ -178,19 +343,20 @@ spec:
|
||||
description: HTTP scheme to use for scraping.
|
||||
type: string
|
||||
scrapeTimeout:
|
||||
description: Timeout after which the scrape is ended
|
||||
description: Timeout after which the scrape is ended If not specified, the Prometheus global scrape timeout is used unless it is less than `Interval` in which the latter is used.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
targetPort:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Name or number of the pod port this endpoint refers to. Mutually exclusive with port.
|
||||
description: Name or number of the target port of the Pod behind the Service, the port must be specified with container port property. Mutually exclusive with port.
|
||||
x-kubernetes-int-or-string: true
|
||||
tlsConfig:
|
||||
description: TLS configuration to use when scraping the endpoint
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the targets.
|
||||
description: Certificate authority used when verifying server certificates.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
@ -207,6 +373,7 @@ spec:
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
@ -222,12 +389,13 @@ spec:
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
caFile:
|
||||
description: Path to the CA cert in the Prometheus container to use for the targets.
|
||||
type: string
|
||||
cert:
|
||||
description: Struct containing the client cert file for the targets.
|
||||
description: Client certificate to present when doing client-authentication.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
@ -244,6 +412,7 @@ spec:
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
secret:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
@ -259,6 +428,7 @@ spec:
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
type: object
|
||||
certFile:
|
||||
description: Path to the client cert file in the Prometheus container for the targets.
|
||||
@ -284,6 +454,7 @@ spec:
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
serverName:
|
||||
description: Used to verify the hostname for the targets.
|
||||
type: string
|
||||
@ -291,22 +462,34 @@ spec:
|
||||
type: object
|
||||
type: array
|
||||
jobLabel:
|
||||
description: The label to use to retrieve the job name from.
|
||||
description: "JobLabel selects the label from the associated Kubernetes service which will be used as the `job` label for all metrics. \n For example: If in `ServiceMonitor.spec.jobLabel: foo` and in `Service.metadata.labels.foo: bar`, then the `job=\"bar\"` label is added to all metrics. \n If the value of this field is empty or if the label doesn't exist for the given Service, the `job` label of the metrics defaults to the name of the Kubernetes Service."
|
||||
type: string
|
||||
labelLimit:
|
||||
description: Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
labelNameLengthLimit:
|
||||
description: Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
labelValueLengthLimit:
|
||||
description: Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer.
|
||||
format: int64
|
||||
type: integer
|
||||
namespaceSelector:
|
||||
description: Selector to select which namespaces the Endpoints objects are discovered from.
|
||||
description: Selector to select which namespaces the Kubernetes Endpoints objects are discovered from.
|
||||
properties:
|
||||
any:
|
||||
description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||
type: boolean
|
||||
matchNames:
|
||||
description: List of namespace names.
|
||||
description: List of namespace names to select from.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
podTargetLabels:
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
|
||||
description: PodTargetLabels transfers labels on the Kubernetes `Pod` onto the created metrics.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
@ -344,11 +527,16 @@ spec:
|
||||
description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
x-kubernetes-map-type: atomic
|
||||
targetLabels:
|
||||
description: TargetLabels transfers labels on the Kubernetes Service onto the target.
|
||||
description: TargetLabels transfers labels from the Kubernetes `Service` onto the created metrics.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLimit:
|
||||
description: TargetLimit defines a limit on the number of scraped targets that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
required:
|
||||
- endpoints
|
||||
- selector
|
||||
@ -358,9 +546,3 @@ spec:
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
status:
|
||||
acceptedNames:
|
||||
kind: ""
|
||||
plural: ""
|
||||
conditions: []
|
||||
storedVersions: []
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,8 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.40.0
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
name: prometheus-operator
|
||||
rules:
|
||||
- apiGroups:
|
||||
@ -12,12 +13,15 @@ rules:
|
||||
resources:
|
||||
- alertmanagers
|
||||
- alertmanagers/finalizers
|
||||
- alertmanagerconfigs
|
||||
- prometheuses
|
||||
- prometheuses/finalizers
|
||||
- prometheuses/status
|
||||
- thanosrulers
|
||||
- thanosrulers/finalizers
|
||||
- servicemonitors
|
||||
- podmonitors
|
||||
- probes
|
||||
- prometheusrules
|
||||
verbs:
|
||||
- '*'
|
||||
@ -67,6 +71,14 @@ rules:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- authentication.k8s.io
|
||||
resources:
|
||||
|
@ -4,7 +4,8 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.40.0
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
name: prometheus-operator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
@ -4,7 +4,8 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.40.0
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -13,20 +14,23 @@ spec:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
kubectl.kubernetes.io/default-container: prometheus-operator
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.40.0
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
spec:
|
||||
automountServiceAccountToken: true
|
||||
containers:
|
||||
- args:
|
||||
- --kubelet-service=kube-system/kubelet
|
||||
- --logtostderr=true
|
||||
- --config-reloader-image=carlosedp/configmap-reload:latest
|
||||
- --prometheus-config-reloader=carlosedp/prometheus-config-reloader:v0.40.0
|
||||
image: carlosedp/prometheus-operator:v0.40.0
|
||||
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.61.1
|
||||
image: quay.io/prometheus-operator/prometheus-operator:v0.61.1
|
||||
name: prometheus-operator
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
@ -40,20 +44,38 @@ spec:
|
||||
memory: 100Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
- args:
|
||||
- --logtostderr
|
||||
- --secure-listen-address=:8443
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
|
||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
|
||||
- --upstream=http://127.0.0.1:8080/
|
||||
image: carlosedp/kube-rbac-proxy:v0.5.0
|
||||
image: quay.io/brancz/kube-rbac-proxy:v0.14.0
|
||||
name: kube-rbac-proxy
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: https
|
||||
resources:
|
||||
limits:
|
||||
cpu: 20m
|
||||
memory: 40Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsGroup: 65532
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
nodeSelector:
|
||||
beta.kubernetes.io/os: linux
|
||||
kubernetes.io/os: linux
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
|
29
manifests/setup/prometheus-operator-networkPolicy.yaml
Normal file
29
manifests/setup/prometheus-operator-networkPolicy.yaml
Normal file
@ -0,0 +1,29 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
egress:
|
||||
- {}
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: prometheus
|
||||
ports:
|
||||
- port: 8443
|
||||
protocol: TCP
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
policyTypes:
|
||||
- Egress
|
||||
- Ingress
|
100
manifests/setup/prometheus-operator-prometheusRule.yaml
Normal file
100
manifests/setup/prometheus-operator-prometheusRule.yaml
Normal file
@ -0,0 +1,100 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
prometheus: k8s
|
||||
role: alert-rules
|
||||
name: prometheus-operator-rules
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: prometheus-operator
|
||||
rules:
|
||||
- alert: PrometheusOperatorListErrors
|
||||
annotations:
|
||||
description: Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorlisterrors
|
||||
summary: Errors while performing list operations in controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorWatchErrors
|
||||
annotations:
|
||||
description: Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors
|
||||
summary: Errors while performing watch operations in controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[5m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorSyncFailed
|
||||
annotations:
|
||||
description: Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorsyncfailed
|
||||
summary: Last controller reconciliation failed
|
||||
expr: |
|
||||
min_over_time(prometheus_operator_syncs{status="failed",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorReconcileErrors
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorreconcileerrors
|
||||
summary: Errors while reconciling controller.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNodeLookupErrors
|
||||
annotations:
|
||||
description: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornodelookuperrors
|
||||
summary: Errors while reconciling Prometheus.
|
||||
expr: |
|
||||
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorNotReady
|
||||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready
|
||||
summary: Prometheus operator not ready
|
||||
expr: |
|
||||
min by (controller,namespace) (max_over_time(prometheus_operator_ready{job="prometheus-operator",namespace="monitoring"}[5m]) == 0)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorRejectedResources
|
||||
annotations:
|
||||
description: Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorrejectedresources
|
||||
summary: Resources rejected by Prometheus operator
|
||||
expr: |
|
||||
min_over_time(prometheus_operator_managed_resources{state="rejected",job="prometheus-operator",namespace="monitoring"}[5m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: config-reloaders
|
||||
rules:
|
||||
- alert: ConfigReloaderSidecarErrors
|
||||
annotations:
|
||||
description: |-
|
||||
Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.
|
||||
As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors
|
||||
summary: config-reloader sidecar has not had a successful reload for 10m
|
||||
expr: |
|
||||
max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
@ -4,7 +4,8 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.40.0
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -16,3 +17,4 @@ spec:
|
||||
selector:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
|
@ -1,9 +1,11 @@
|
||||
apiVersion: v1
|
||||
automountServiceAccountToken: false
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: controller
|
||||
app.kubernetes.io/name: prometheus-operator
|
||||
app.kubernetes.io/version: v0.40.0
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.61.1
|
||||
name: prometheus-operator
|
||||
namespace: monitoring
|
||||
|
@ -1,5 +1,5 @@
|
||||
local utils = import '../utils.libsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
@ -1,5 +1,5 @@
|
||||
local utils = import '../utils.libsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
@ -1,6 +1,6 @@
|
||||
local utils = import '../utils.libsonnet';
|
||||
local vars = import '../vars.jsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
local utils = import '../utils.libsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
@ -1,5 +1,5 @@
|
||||
local utils = import '../utils.libsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
@ -18,9 +18,9 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
local nginxPort = servicePort.newNamed('prometheus', 10254, 10254);
|
||||
|
||||
service.new('ingress-nginx-metrics', {'app.kubernetes.io/name': 'ingress-nginx'}, nginxPort) +
|
||||
service.new('ingress-nginx-metrics', { 'app.kubernetes.io/name': 'ingress-nginx' }, nginxPort) +
|
||||
service.mixin.metadata.withNamespace('ingress-nginx') +
|
||||
service.mixin.metadata.withLabels({'app.kubernetes.io/name': 'ingress-nginx'}) +
|
||||
service.mixin.metadata.withLabels({ 'app.kubernetes.io/name': 'ingress-nginx' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
|
||||
clusterRole:
|
||||
|
@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
@ -1,5 +1,5 @@
|
||||
local utils = import '../utils.libsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
@ -7,7 +7,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
replicas: 1,
|
||||
|
||||
imageRepos+:: {
|
||||
speedtestExporter: 'ghcr.io/miguelndecarvalho/speedtest-exporter',
|
||||
speedtestExporter: 'ghcr.io/miguelndecarvalho/speedtest-exporter',
|
||||
},
|
||||
|
||||
// Add custom dashboards
|
||||
|
@ -1,5 +1,5 @@
|
||||
local utils = import '../utils.libsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
@ -1,5 +1,5 @@
|
||||
local utils = import '../utils.libsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user