mirror of
https://github.com/carlosedp/cluster-monitoring.git
synced 2024-12-18 19:05:44 +01:00
Fix namespace definition reported in #144. Rebuild manifests.
This commit is contained in:
parent
514aa37f9a
commit
46176b4671
@ -4,7 +4,7 @@ local vars = import 'vars.jsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
namespace: vars._config.namespace,
|
||||
|
||||
urls+:: {
|
||||
domains: [vars.suffixDomain] + vars.additionalDomains,
|
||||
|
@ -708,24 +708,19 @@ spec:
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- name: kube-prometheus-node-recording.rules
|
||||
rules:
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY
|
||||
(instance)
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
|
||||
record: instance:node_cpu:rate:sum
|
||||
- expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}))
|
||||
BY (instance)
|
||||
- expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
|
||||
record: instance:node_filesystem_usage:sum
|
||||
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
|
||||
record: instance:node_network_receive_bytes:rate:sum
|
||||
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
|
||||
record: instance:node_network_transmit_bytes:rate:sum
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT
|
||||
(cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
|
||||
BY (instance, cpu)) BY (instance)
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
|
||||
record: instance:node_cpu:ratio
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
|
||||
record: cluster:node_cpu:sum_rate5m
|
||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
|
||||
BY (instance, cpu))
|
||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
|
||||
record: cluster:node_cpu:ratio
|
||||
- name: kube-prometheus-general.rules
|
||||
rules:
|
||||
@ -737,9 +732,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeStateMetricsListErrors
|
||||
annotations:
|
||||
message: kube-state-metrics is experiencing errors at an elevated rate in
|
||||
list operations. This is likely causing it to not be able to expose metrics
|
||||
about Kubernetes objects correctly or at all.
|
||||
message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
|
||||
expr: |
|
||||
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
@ -751,9 +744,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsWatchErrors
|
||||
annotations:
|
||||
message: kube-state-metrics is experiencing errors at an elevated rate in
|
||||
watch operations. This is likely causing it to not be able to expose metrics
|
||||
about Kubernetes objects correctly or at all.
|
||||
message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors
|
||||
expr: |
|
||||
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
@ -767,9 +758,7 @@ spec:
|
||||
rules:
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: |
|
||||
@ -785,9 +774,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up fast.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |
|
||||
@ -803,8 +790,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 5% space left.
|
||||
expr: |
|
||||
@ -818,8 +804,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 3% space left.
|
||||
expr: |
|
||||
@ -833,9 +818,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |
|
||||
@ -851,9 +834,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up fast.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |
|
||||
@ -869,8 +850,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 5% inodes left.
|
||||
expr: |
|
||||
@ -884,8 +864,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 3% inodes left.
|
||||
expr: |
|
||||
@ -899,8 +878,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: NodeNetworkReceiveErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
|
||||
summary: Network interface is reporting many receive errors.
|
||||
expr: |
|
||||
@ -910,8 +888,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeNetworkTransmitErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
|
||||
summary: Network interface is reporting many transmit errors.
|
||||
expr: |
|
||||
@ -939,8 +916,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeClockSkewDetected
|
||||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
||||
Ensure NTP is configured correctly on this host.
|
||||
message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
|
||||
summary: Clock skew detected.
|
||||
expr: |
|
||||
@ -960,8 +936,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeClockNotSynchronising
|
||||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP
|
||||
is configured on this host.
|
||||
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
|
||||
summary: Clock not synchronising.
|
||||
expr: |
|
||||
@ -973,8 +948,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
||||
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
||||
expr: |
|
||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
|
||||
@ -983,8 +957,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||
state for longer than 15 minutes.
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
||||
expr: |
|
||||
sum by (namespace, pod) (
|
||||
@ -999,9 +972,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentGenerationMismatch
|
||||
annotations:
|
||||
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||
}} does not match, this indicates that the Deployment has failed but has
|
||||
not been rolled back.
|
||||
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
|
||||
expr: |
|
||||
kube_deployment_status_observed_generation{job="kube-state-metrics"}
|
||||
@ -1012,8 +983,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
annotations:
|
||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
|
||||
matched the expected number of replicas for longer than 15 minutes.
|
||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
|
||||
expr: |
|
||||
(
|
||||
@ -1030,8 +1000,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetReplicasMismatch
|
||||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
||||
not matched the expected number of replicas for longer than 15 minutes.
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
|
||||
expr: |
|
||||
(
|
||||
@ -1048,9 +1017,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetGenerationMismatch
|
||||
annotations:
|
||||
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||
}} does not match, this indicates that the StatefulSet has failed but has
|
||||
not been rolled back.
|
||||
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
|
||||
expr: |
|
||||
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
|
||||
@ -1061,8 +1028,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
||||
has not been rolled out.
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
|
||||
expr: |
|
||||
(
|
||||
@ -1087,8 +1053,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
|
||||
{{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.
|
||||
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
|
||||
expr: |
|
||||
kube_daemonset_status_number_ready{job="kube-state-metrics"}
|
||||
@ -1099,8 +1064,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||
has been in waiting state for longer than 1 hour.
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
|
||||
expr: |
|
||||
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
@ -1109,8 +1073,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are not scheduled.'
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
|
||||
expr: |
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
|
||||
@ -1121,8 +1084,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetMisScheduled
|
||||
annotations:
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are running where they are not supposed to run.'
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
|
||||
expr: |
|
||||
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||
@ -1131,8 +1093,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeCronJobRunning
|
||||
annotations:
|
||||
message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more
|
||||
than 1h to complete.
|
||||
message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
|
||||
expr: |
|
||||
time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
|
||||
@ -1141,8 +1102,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeJobCompletion
|
||||
annotations:
|
||||
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
|
||||
than one hour to complete.
|
||||
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than one hour to complete.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
|
||||
expr: |
|
||||
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
||||
@ -1160,8 +1120,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeHpaReplicasMismatch
|
||||
annotations:
|
||||
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the
|
||||
desired number of replicas for longer than 15 minutes.
|
||||
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
|
||||
expr: |
|
||||
(kube_hpa_status_desired_replicas{job="kube-state-metrics"}
|
||||
@ -1174,8 +1133,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeHpaMaxedOut
|
||||
annotations:
|
||||
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at
|
||||
max replicas for longer than 15 minutes.
|
||||
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
|
||||
expr: |
|
||||
kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
@ -1188,8 +1146,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeCPUOvercommit
|
||||
annotations:
|
||||
message: Cluster has overcommitted CPU resource requests for Pods and cannot
|
||||
tolerate node failure.
|
||||
message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
||||
expr: |
|
||||
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
|
||||
@ -1202,8 +1159,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeMemoryOvercommit
|
||||
annotations:
|
||||
message: Cluster has overcommitted memory resource requests for Pods and cannot
|
||||
tolerate node failure.
|
||||
message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
|
||||
expr: |
|
||||
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
|
||||
@ -1242,8 +1198,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeQuotaExceeded
|
||||
annotations:
|
||||
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
||||
expr: |
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
@ -1255,9 +1210,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: CPUThrottlingHigh
|
||||
annotations:
|
||||
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace
|
||||
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
|
||||
$labels.pod }}.'
|
||||
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
||||
expr: |
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
|
||||
@ -1271,9 +1224,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
|
||||
}} free.
|
||||
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
|
||||
expr: |
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
@ -1285,9 +1236,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is expected to fill up within four
|
||||
days. Currently {{ $value | humanizePercentage }} is available.
|
||||
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
|
||||
expr: |
|
||||
(
|
||||
@ -1302,8 +1251,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubePersistentVolumeErrors
|
||||
annotations:
|
||||
message: The persistent volume {{ $labels.persistentvolume }} has status {{
|
||||
$labels.phase }}.
|
||||
message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
|
||||
expr: |
|
||||
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
|
||||
@ -1314,8 +1262,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeVersionMismatch
|
||||
annotations:
|
||||
message: There are {{ $value }} different semantic versions of Kubernetes
|
||||
components running.
|
||||
message: There are {{ $value }} different semantic versions of Kubernetes components running.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
|
||||
expr: |
|
||||
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
|
||||
@ -1324,8 +1271,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientErrors
|
||||
annotations:
|
||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||
}}' is experiencing {{ $value | humanizePercentage }} errors.'
|
||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
|
||||
expr: |
|
||||
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
|
||||
@ -1393,8 +1339,7 @@ spec:
|
||||
rules:
|
||||
- alert: KubeAPILatencyHigh
|
||||
annotations:
|
||||
message: The API server has an abnormal latency of {{ $value }} seconds for
|
||||
{{ $labels.verb }} {{ $labels.resource }}.
|
||||
message: The API server has an abnormal latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||
expr: |
|
||||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
|
||||
@ -1417,9 +1362,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
|
||||
}}.
|
||||
message: API server is returning errors for {{ $value | humanizePercentage }} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
|
||||
@ -1430,8 +1373,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
message: A client certificate used to authenticate to the apiserver is expiring
|
||||
in less than 7.0 days.
|
||||
message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||||
@ -1439,8 +1381,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
message: A client certificate used to authenticate to the apiserver is expiring
|
||||
in less than 24.0 hours.
|
||||
message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||||
@ -1448,10 +1389,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: AggregatedAPIErrors
|
||||
annotations:
|
||||
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has
|
||||
reported errors. The number of errors have increased for it in the past
|
||||
five minutes. High values indicate that the availability of the service
|
||||
changes too often.
|
||||
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
|
||||
expr: |
|
||||
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
|
||||
@ -1459,8 +1397,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: AggregatedAPIDown
|
||||
annotations:
|
||||
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down.
|
||||
It has not been available at least for the past five minutes.
|
||||
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down. It has not been available at least for the past five minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
|
||||
expr: |
|
||||
sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0
|
||||
@ -1497,8 +1434,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletTooManyPods
|
||||
annotations:
|
||||
message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
|
||||
}} of its Pod capacity.
|
||||
message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
||||
expr: |
|
||||
max(max(kubelet_running_pod_count{job="kubelet", metrics_path="/metrics"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"} != 1) by(node) > 0.95
|
||||
@ -1507,8 +1443,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeNodeReadinessFlapping
|
||||
annotations:
|
||||
message: The readiness status of node {{ $labels.node }} has changed {{ $value
|
||||
}} times in the last 15 minutes.
|
||||
message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
|
||||
expr: |
|
||||
sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
|
||||
@ -1517,8 +1452,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletPlegDurationHigh
|
||||
annotations:
|
||||
message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration
|
||||
of {{ $value }} seconds on node {{ $labels.node }}.
|
||||
message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
|
||||
expr: |
|
||||
node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
|
||||
@ -1527,8 +1461,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletPodStartUpLatencyHigh
|
||||
annotations:
|
||||
message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
|
||||
on node {{ $labels.node }}.
|
||||
message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
|
||||
expr: |
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
@ -1570,8 +1503,7 @@ spec:
|
||||
rules:
|
||||
- alert: PrometheusBadConfig
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
||||
reload its configuration.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
|
||||
summary: Failed Prometheus configuration reload.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
@ -1582,10 +1514,8 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusNotificationQueueRunningFull
|
||||
annotations:
|
||||
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}}
|
||||
is running full.
|
||||
summary: Prometheus alert notification queue predicted to run full in less
|
||||
than 30m.
|
||||
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
|
||||
summary: Prometheus alert notification queue predicted to run full in less than 30m.
|
||||
expr: |
|
||||
# Without min_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
@ -1599,10 +1529,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% errors while sending alerts from
|
||||
Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
||||
summary: Prometheus has encountered more than 1% errors sending alerts to
|
||||
a specific Alertmanager.
|
||||
description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
||||
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
|
||||
expr: |
|
||||
(
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
@ -1616,8 +1544,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
|
||||
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||
expr: |
|
||||
min without(alertmanager) (
|
||||
@ -1632,8 +1559,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusNotConnectedToAlertmanagers
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
|
||||
to any Alertmanagers.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
|
||||
summary: Prometheus is not connected to any Alertmanagers.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
@ -1644,8 +1570,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusTSDBReloadsFailing
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
||||
{{$value | humanize}} reload failures over the last 3h.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
|
||||
summary: Prometheus has issues reloading blocks from disk.
|
||||
expr: |
|
||||
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
|
||||
@ -1654,8 +1579,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusTSDBCompactionsFailing
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
||||
{{$value | humanize}} compaction failures over the last 3h.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
|
||||
summary: Prometheus has issues compacting blocks.
|
||||
expr: |
|
||||
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
|
||||
@ -1664,8 +1588,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusNotIngestingSamples
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
|
||||
samples.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
|
||||
summary: Prometheus is not ingesting samples.
|
||||
expr: |
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
|
||||
@ -1674,9 +1597,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusDuplicateTimestamps
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
||||
{{ printf "%.4g" $value }} samples/s with different values but duplicated
|
||||
timestamp.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
|
||||
summary: Prometheus is dropping samples with duplicate timestamps.
|
||||
expr: |
|
||||
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
@ -1685,8 +1606,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusOutOfOrderTimestamps
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
||||
{{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
||||
summary: Prometheus drops samples with out-of-order timestamps.
|
||||
expr: |
|
||||
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
@ -1695,9 +1615,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusRemoteStorageFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
|
||||
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
|
||||
$labels.url }}
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
|
||||
summary: Prometheus fails to send samples to remote storage.
|
||||
expr: |
|
||||
(
|
||||
@ -1716,9 +1634,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteBehind
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
|
||||
}}.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
|
||||
summary: Prometheus remote write is behind.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
@ -1734,13 +1650,8 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteDesiredShards
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||
desired shards calculation wants to run {{ $value }} shards for queue {{
|
||||
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
|
||||
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
|
||||
$labels.instance | query | first | value }}.
|
||||
summary: Prometheus remote write desired shards calculation wants to run more
|
||||
than configured max shards.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
|
||||
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
@ -1754,8 +1665,7 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusRuleFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
||||
evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
||||
summary: Prometheus is failing rule evaluations.
|
||||
expr: |
|
||||
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
@ -1764,8 +1674,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusMissingRuleEvaluations
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{
|
||||
printf "%.0f" $value }} rule group evaluations in the last 5m.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
|
||||
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
||||
expr: |
|
||||
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
@ -1776,8 +1685,7 @@ spec:
|
||||
rules:
|
||||
- alert: AlertmanagerConfigInconsistent
|
||||
annotations:
|
||||
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
|
||||
are out of sync.
|
||||
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.
|
||||
expr: |
|
||||
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
|
||||
for: 5m
|
||||
@ -1785,8 +1693,7 @@ spec:
|
||||
severity: critical
|
||||
- alert: AlertmanagerFailedReload
|
||||
annotations:
|
||||
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
||||
}}/{{ $labels.pod}}.
|
||||
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.
|
||||
expr: |
|
||||
alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
|
||||
for: 10m
|
||||
@ -1806,10 +1713,8 @@ spec:
|
||||
rules:
|
||||
- alert: TargetDown
|
||||
annotations:
|
||||
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
|
||||
}} targets in {{ $labels.namespace }} namespace are down.'
|
||||
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
|
||||
namespace, service)) > 10
|
||||
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
|
||||
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -1828,8 +1733,7 @@ spec:
|
||||
rules:
|
||||
- alert: NodeNetworkInterfaceFlapping
|
||||
annotations:
|
||||
message: Network interface "{{ $labels.device }}" changing it's up status
|
||||
often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
|
||||
message: Network interface "{{ $labels.device }}" changing it's up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
|
||||
expr: |
|
||||
changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
|
||||
for: 2m
|
||||
@ -1839,8 +1743,7 @@ spec:
|
||||
rules:
|
||||
- alert: PrometheusOperatorReconcileErrors
|
||||
annotations:
|
||||
message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace
|
||||
}} Namespace.
|
||||
message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.
|
||||
expr: |
|
||||
rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
|
||||
for: 10m
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -20,31 +20,24 @@ spec:
|
||||
description: PodMonitor defines monitoring for a set of pods.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: Specification of desired Pod selection for target discovery
|
||||
by Prometheus.
|
||||
description: Specification of desired Pod selection for target discovery by Prometheus.
|
||||
properties:
|
||||
jobLabel:
|
||||
description: The label to use to retrieve the job name from.
|
||||
type: string
|
||||
namespaceSelector:
|
||||
description: Selector to select which namespaces the Endpoints objects
|
||||
are discovered from.
|
||||
description: Selector to select which namespaces the Endpoints objects are discovered from.
|
||||
properties:
|
||||
any:
|
||||
description: Boolean describing whether all namespaces are selected
|
||||
in contrast to a list restricting them.
|
||||
description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||
type: boolean
|
||||
matchNames:
|
||||
description: List of namespace names.
|
||||
@ -55,63 +48,45 @@ spec:
|
||||
podMetricsEndpoints:
|
||||
description: A list of endpoints allowed as part of this PodMonitor.
|
||||
items:
|
||||
description: PodMetricsEndpoint defines a scrapeable endpoint of
|
||||
a Kubernetes Pod serving Prometheus metrics.
|
||||
description: PodMetricsEndpoint defines a scrapeable endpoint of a Kubernetes Pod serving Prometheus metrics.
|
||||
properties:
|
||||
honorLabels:
|
||||
description: HonorLabels chooses the metric's labels on collisions
|
||||
with target labels.
|
||||
description: HonorLabels chooses the metric's labels on collisions with target labels.
|
||||
type: boolean
|
||||
honorTimestamps:
|
||||
description: HonorTimestamps controls whether Prometheus respects
|
||||
the timestamps present in scraped data.
|
||||
description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
|
||||
type: boolean
|
||||
interval:
|
||||
description: Interval at which metrics should be scraped
|
||||
type: string
|
||||
metricRelabelings:
|
||||
description: MetricRelabelConfigs to apply to samples before
|
||||
ingestion.
|
||||
description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the
|
||||
label set, being applied to samples before ingestion. It
|
||||
defines `<metric_relabel_configs>`-section of Prometheus
|
||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching.
|
||||
Default is 'replace'
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source
|
||||
label values.
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted
|
||||
value is matched. Default is '(.*)'
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace
|
||||
is performed if the regular expression matches. Regex
|
||||
capture groups are available. Default is '$1'
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source
|
||||
label values. default is ';'.
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing
|
||||
labels. Their content is concatenated using the configured
|
||||
separator and matched against the configured regular
|
||||
expression for the replace, keep, and drop actions.
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written
|
||||
in a replace action. It is mandatory for replace actions.
|
||||
Regex capture groups are available.
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
@ -126,56 +101,39 @@ spec:
|
||||
description: HTTP path to scrape for metrics.
|
||||
type: string
|
||||
port:
|
||||
description: Name of the pod port this endpoint refers to. Mutually
|
||||
exclusive with targetPort.
|
||||
description: Name of the pod port this endpoint refers to. Mutually exclusive with targetPort.
|
||||
type: string
|
||||
proxyUrl:
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
|
||||
to proxy through this endpoint.
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
|
||||
type: string
|
||||
relabelings:
|
||||
description: 'RelabelConfigs to apply to samples before ingestion.
|
||||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the
|
||||
label set, being applied to samples before ingestion. It
|
||||
defines `<metric_relabel_configs>`-section of Prometheus
|
||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching.
|
||||
Default is 'replace'
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source
|
||||
label values.
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted
|
||||
value is matched. Default is '(.*)'
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace
|
||||
is performed if the regular expression matches. Regex
|
||||
capture groups are available. Default is '$1'
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source
|
||||
label values. default is ';'.
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing
|
||||
labels. Their content is concatenated using the configured
|
||||
separator and matched against the configured regular
|
||||
expression for the replace, keep, and drop actions.
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written
|
||||
in a replace action. It is mandatory for replace actions.
|
||||
Regex capture groups are available.
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
@ -194,42 +152,30 @@ spec:
|
||||
type: object
|
||||
type: array
|
||||
podTargetLabels:
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod
|
||||
onto the target.
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
sampleLimit:
|
||||
description: SampleLimit defines per-scrape limit on number of scraped
|
||||
samples that will be accepted.
|
||||
description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
selector:
|
||||
description: Selector to select Pod objects.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector requirements.
|
||||
The requirements are ANDed.
|
||||
description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector that
|
||||
contains values, a key, and an operator that relates the key
|
||||
and values.
|
||||
description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector applies
|
||||
to.
|
||||
description: key is the label key that the selector applies to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship to
|
||||
a set of values. Valid operators are In, NotIn, Exists
|
||||
and DoesNotExist.
|
||||
description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values. If the
|
||||
operator is In or NotIn, the values array must be non-empty.
|
||||
If the operator is Exists or DoesNotExist, the values
|
||||
array must be empty. This array is replaced during a strategic
|
||||
merge patch.
|
||||
description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
@ -241,11 +187,7 @@ spec:
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs. A single
|
||||
{key,value} in the matchLabels map is equivalent to an element
|
||||
of matchExpressions, whose key field is "key", the operator
|
||||
is "In", and the values array contains only "value". The requirements
|
||||
are ANDed.
|
||||
description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
required:
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -20,14 +20,10 @@ spec:
|
||||
description: PrometheusRule defines alerting rules for a Prometheus instance
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
@ -37,10 +33,7 @@ spec:
|
||||
groups:
|
||||
description: Content of Prometheus rule file
|
||||
items:
|
||||
description: 'RuleGroup is a list of sequentially evaluated recording
|
||||
and alerting rules. Note: PartialResponseStrategy is only used
|
||||
by ThanosRuler and will be ignored by Prometheus instances. Valid
|
||||
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
|
||||
description: 'RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
|
||||
properties:
|
||||
interval:
|
||||
type: string
|
||||
|
@ -20,65 +20,50 @@ spec:
|
||||
description: ServiceMonitor defines monitoring for a set of services.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: Specification of desired Service selection for target discovery
|
||||
by Prometheus.
|
||||
description: Specification of desired Service selection for target discovery by Prometheus.
|
||||
properties:
|
||||
endpoints:
|
||||
description: A list of endpoints allowed as part of this ServiceMonitor.
|
||||
items:
|
||||
description: Endpoint defines a scrapeable endpoint serving Prometheus
|
||||
metrics.
|
||||
description: Endpoint defines a scrapeable endpoint serving Prometheus metrics.
|
||||
properties:
|
||||
basicAuth:
|
||||
description: 'BasicAuth allow an endpoint to authenticate over
|
||||
basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
|
||||
description: 'BasicAuth allow an endpoint to authenticate over basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
|
||||
properties:
|
||||
password:
|
||||
description: The secret in the service monitor namespace
|
||||
that contains the password for authentication.
|
||||
description: The secret in the service monitor namespace that contains the password for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
username:
|
||||
description: The secret in the service monitor namespace
|
||||
that contains the username for authentication.
|
||||
description: The secret in the service monitor namespace that contains the username for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
@ -88,79 +73,57 @@ spec:
|
||||
description: File to read bearer token for scraping targets.
|
||||
type: string
|
||||
bearerTokenSecret:
|
||||
description: Secret to mount to read bearer token for scraping
|
||||
targets. The secret needs to be in the same namespace as the
|
||||
service monitor and accessible by the Prometheus Operator.
|
||||
description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the service monitor and accessible by the Prometheus Operator.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
honorLabels:
|
||||
description: HonorLabels chooses the metric's labels on collisions
|
||||
with target labels.
|
||||
description: HonorLabels chooses the metric's labels on collisions with target labels.
|
||||
type: boolean
|
||||
honorTimestamps:
|
||||
description: HonorTimestamps controls whether Prometheus respects
|
||||
the timestamps present in scraped data.
|
||||
description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
|
||||
type: boolean
|
||||
interval:
|
||||
description: Interval at which metrics should be scraped
|
||||
type: string
|
||||
metricRelabelings:
|
||||
description: MetricRelabelConfigs to apply to samples before
|
||||
ingestion.
|
||||
description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the
|
||||
label set, being applied to samples before ingestion. It
|
||||
defines `<metric_relabel_configs>`-section of Prometheus
|
||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching.
|
||||
Default is 'replace'
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source
|
||||
label values.
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted
|
||||
value is matched. Default is '(.*)'
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace
|
||||
is performed if the regular expression matches. Regex
|
||||
capture groups are available. Default is '$1'
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source
|
||||
label values. default is ';'.
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing
|
||||
labels. Their content is concatenated using the configured
|
||||
separator and matched against the configured regular
|
||||
expression for the replace, keep, and drop actions.
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written
|
||||
in a replace action. It is mandatory for replace actions.
|
||||
Regex capture groups are available.
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
@ -175,56 +138,39 @@ spec:
|
||||
description: HTTP path to scrape for metrics.
|
||||
type: string
|
||||
port:
|
||||
description: Name of the service port this endpoint refers to.
|
||||
Mutually exclusive with targetPort.
|
||||
description: Name of the service port this endpoint refers to. Mutually exclusive with targetPort.
|
||||
type: string
|
||||
proxyUrl:
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
|
||||
to proxy through this endpoint.
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
|
||||
type: string
|
||||
relabelings:
|
||||
description: 'RelabelConfigs to apply to samples before scraping.
|
||||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
description: 'RelabelConfigs to apply to samples before scraping. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the
|
||||
label set, being applied to samples before ingestion. It
|
||||
defines `<metric_relabel_configs>`-section of Prometheus
|
||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching.
|
||||
Default is 'replace'
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source
|
||||
label values.
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted
|
||||
value is matched. Default is '(.*)'
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace
|
||||
is performed if the regular expression matches. Regex
|
||||
capture groups are available. Default is '$1'
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source
|
||||
label values. default is ';'.
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing
|
||||
labels. Their content is concatenated using the configured
|
||||
separator and matched against the configured regular
|
||||
expression for the replace, keep, and drop actions.
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written
|
||||
in a replace action. It is mandatory for replace actions.
|
||||
Regex capture groups are available.
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
@ -238,31 +184,25 @@ spec:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Name or number of the pod port this endpoint refers
|
||||
to. Mutually exclusive with port.
|
||||
description: Name or number of the pod port this endpoint refers to. Mutually exclusive with port.
|
||||
x-kubernetes-int-or-string: true
|
||||
tlsConfig:
|
||||
description: TLS configuration to use when scraping the endpoint
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the
|
||||
targets.
|
||||
description: Stuct containing the CA cert to use for the targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the
|
||||
targets.
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its
|
||||
key must be defined
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
@ -271,45 +211,35 @@ spec:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key
|
||||
must be defined
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
type: object
|
||||
caFile:
|
||||
description: Path to the CA cert in the Prometheus container
|
||||
to use for the targets.
|
||||
description: Path to the CA cert in the Prometheus container to use for the targets.
|
||||
type: string
|
||||
cert:
|
||||
description: Struct containing the client cert file for
|
||||
the targets.
|
||||
description: Struct containing the client cert file for the targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the
|
||||
targets.
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its
|
||||
key must be defined
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
@ -318,48 +248,38 @@ spec:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key
|
||||
must be defined
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
type: object
|
||||
certFile:
|
||||
description: Path to the client cert file in the Prometheus
|
||||
container for the targets.
|
||||
description: Path to the client cert file in the Prometheus container for the targets.
|
||||
type: string
|
||||
insecureSkipVerify:
|
||||
description: Disable target certificate validation.
|
||||
type: boolean
|
||||
keyFile:
|
||||
description: Path to the client key file in the Prometheus
|
||||
container for the targets.
|
||||
description: Path to the client key file in the Prometheus container for the targets.
|
||||
type: string
|
||||
keySecret:
|
||||
description: Secret containing the client key file for the
|
||||
targets.
|
||||
description: Secret containing the client key file for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
@ -374,12 +294,10 @@ spec:
|
||||
description: The label to use to retrieve the job name from.
|
||||
type: string
|
||||
namespaceSelector:
|
||||
description: Selector to select which namespaces the Endpoints objects
|
||||
are discovered from.
|
||||
description: Selector to select which namespaces the Endpoints objects are discovered from.
|
||||
properties:
|
||||
any:
|
||||
description: Boolean describing whether all namespaces are selected
|
||||
in contrast to a list restricting them.
|
||||
description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||
type: boolean
|
||||
matchNames:
|
||||
description: List of namespace names.
|
||||
@ -388,42 +306,30 @@ spec:
|
||||
type: array
|
||||
type: object
|
||||
podTargetLabels:
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod
|
||||
onto the target.
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
sampleLimit:
|
||||
description: SampleLimit defines per-scrape limit on number of scraped
|
||||
samples that will be accepted.
|
||||
description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
selector:
|
||||
description: Selector to select Endpoints objects.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector requirements.
|
||||
The requirements are ANDed.
|
||||
description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector that
|
||||
contains values, a key, and an operator that relates the key
|
||||
and values.
|
||||
description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector applies
|
||||
to.
|
||||
description: key is the label key that the selector applies to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship to
|
||||
a set of values. Valid operators are In, NotIn, Exists
|
||||
and DoesNotExist.
|
||||
description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values. If the
|
||||
operator is In or NotIn, the values array must be non-empty.
|
||||
If the operator is Exists or DoesNotExist, the values
|
||||
array must be empty. This array is replaced during a strategic
|
||||
merge patch.
|
||||
description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
@ -435,16 +341,11 @@ spec:
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs. A single
|
||||
{key,value} in the matchLabels map is equivalent to an element
|
||||
of matchExpressions, whose key field is "key", the operator
|
||||
is "In", and the values array contains only "value". The requirements
|
||||
are ANDed.
|
||||
description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
targetLabels:
|
||||
description: TargetLabels transfers labels on the Kubernetes Service
|
||||
onto the target.
|
||||
description: TargetLabels transfers labels on the Kubernetes Service onto the target.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user