mirror of
https://github.com/carlosedp/cluster-monitoring.git
synced 2024-11-20 19:07:17 +01:00
Merge pull request #117 from ToMe25/ingress_improvements
This commit is contained in:
commit
e5a8f1ab52
@ -18258,7 +18258,7 @@ items:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
|
||||
"expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -18342,7 +18342,7 @@ items:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
|
||||
"expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
|
@ -708,19 +708,24 @@ spec:
|
||||
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
|
||||
- name: kube-prometheus-node-recording.rules
|
||||
rules:
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY
|
||||
(instance)
|
||||
record: instance:node_cpu:rate:sum
|
||||
- expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
|
||||
- expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}))
|
||||
BY (instance)
|
||||
record: instance:node_filesystem_usage:sum
|
||||
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
|
||||
record: instance:node_network_receive_bytes:rate:sum
|
||||
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
|
||||
record: instance:node_network_transmit_bytes:rate:sum
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT
|
||||
(cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
|
||||
BY (instance, cpu)) BY (instance)
|
||||
record: instance:node_cpu:ratio
|
||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
|
||||
record: cluster:node_cpu:sum_rate5m
|
||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
|
||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
|
||||
BY (instance, cpu))
|
||||
record: cluster:node_cpu:ratio
|
||||
- name: kube-prometheus-general.rules
|
||||
rules:
|
||||
@ -732,7 +737,9 @@ spec:
|
||||
rules:
|
||||
- alert: KubeStateMetricsListErrors
|
||||
annotations:
|
||||
message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
||||
message: kube-state-metrics is experiencing errors at an elevated rate in
|
||||
list operations. This is likely causing it to not be able to expose metrics
|
||||
about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
|
||||
expr: |
|
||||
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
@ -744,7 +751,9 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeStateMetricsWatchErrors
|
||||
annotations:
|
||||
message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
||||
message: kube-state-metrics is experiencing errors at an elevated rate in
|
||||
watch operations. This is likely causing it to not be able to expose metrics
|
||||
about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors
|
||||
expr: |
|
||||
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
@ -758,7 +767,9 @@ spec:
|
||||
rules:
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: |
|
||||
@ -774,7 +785,9 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |
|
||||
@ -790,7 +803,8 @@ spec:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 5% space left.
|
||||
expr: |
|
||||
@ -804,7 +818,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 3% space left.
|
||||
expr: |
|
||||
@ -818,7 +833,9 @@ spec:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |
|
||||
@ -834,7 +851,9 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |
|
||||
@ -850,7 +869,8 @@ spec:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 5% inodes left.
|
||||
expr: |
|
||||
@ -864,7 +884,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 3% inodes left.
|
||||
expr: |
|
||||
@ -878,7 +899,8 @@ spec:
|
||||
severity: critical
|
||||
- alert: NodeNetworkReceiveErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
|
||||
summary: Network interface is reporting many receive errors.
|
||||
expr: |
|
||||
@ -888,7 +910,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeNetworkTransmitErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
|
||||
summary: Network interface is reporting many transmit errors.
|
||||
expr: |
|
||||
@ -916,7 +939,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeClockSkewDetected
|
||||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
|
||||
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
||||
Ensure NTP is configured correctly on this host.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
|
||||
summary: Clock skew detected.
|
||||
expr: |
|
||||
@ -936,7 +960,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: NodeClockNotSynchronising
|
||||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
|
||||
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP
|
||||
is configured on this host.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
|
||||
summary: Clock not synchronising.
|
||||
expr: |
|
||||
@ -948,7 +973,8 @@ spec:
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
||||
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
||||
expr: |
|
||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
|
||||
@ -957,7 +983,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||
state for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
||||
expr: |
|
||||
sum by (namespace, pod) (
|
||||
@ -972,7 +999,9 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentGenerationMismatch
|
||||
annotations:
|
||||
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
|
||||
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||
}} does not match, this indicates that the Deployment has failed but has
|
||||
not been rolled back.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
|
||||
expr: |
|
||||
kube_deployment_status_observed_generation{job="kube-state-metrics"}
|
||||
@ -983,7 +1012,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
annotations:
|
||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
|
||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
|
||||
matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
|
||||
expr: |
|
||||
(
|
||||
@ -1000,7 +1030,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetReplicasMismatch
|
||||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
||||
not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
|
||||
expr: |
|
||||
(
|
||||
@ -1017,7 +1048,9 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetGenerationMismatch
|
||||
annotations:
|
||||
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
|
||||
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||
}} does not match, this indicates that the StatefulSet has failed but has
|
||||
not been rolled back.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
|
||||
expr: |
|
||||
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
|
||||
@ -1028,7 +1061,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||
annotations:
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
|
||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
||||
has not been rolled out.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
|
||||
expr: |
|
||||
(
|
||||
@ -1053,7 +1087,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.
|
||||
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
|
||||
{{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
|
||||
expr: |
|
||||
kube_daemonset_status_number_ready{job="kube-state-metrics"}
|
||||
@ -1064,7 +1099,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||
has been in waiting state for longer than 1 hour.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
|
||||
expr: |
|
||||
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
@ -1073,7 +1109,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are not scheduled.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
|
||||
expr: |
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
|
||||
@ -1084,7 +1121,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetMisScheduled
|
||||
annotations:
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are running where they are not supposed to run.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
|
||||
expr: |
|
||||
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||
@ -1093,7 +1131,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeCronJobRunning
|
||||
annotations:
|
||||
message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete.
|
||||
message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more
|
||||
than 1h to complete.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
|
||||
expr: |
|
||||
time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
|
||||
@ -1102,7 +1141,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeJobCompletion
|
||||
annotations:
|
||||
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than one hour to complete.
|
||||
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
|
||||
than one hour to complete.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
|
||||
expr: |
|
||||
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
||||
@ -1120,7 +1160,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeHpaReplicasMismatch
|
||||
annotations:
|
||||
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
|
||||
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the
|
||||
desired number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
|
||||
expr: |
|
||||
(kube_hpa_status_desired_replicas{job="kube-state-metrics"}
|
||||
@ -1133,7 +1174,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeHpaMaxedOut
|
||||
annotations:
|
||||
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
|
||||
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at
|
||||
max replicas for longer than 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
|
||||
expr: |
|
||||
kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
@ -1146,7 +1188,8 @@ spec:
|
||||
rules:
|
||||
- alert: KubeCPUOvercommit
|
||||
annotations:
|
||||
message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
|
||||
message: Cluster has overcommitted CPU resource requests for Pods and cannot
|
||||
tolerate node failure.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
||||
expr: |
|
||||
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
|
||||
@ -1159,7 +1202,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeMemoryOvercommit
|
||||
annotations:
|
||||
message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
|
||||
message: Cluster has overcommitted memory resource requests for Pods and cannot
|
||||
tolerate node failure.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
|
||||
expr: |
|
||||
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
|
||||
@ -1198,7 +1242,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeQuotaExceeded
|
||||
annotations:
|
||||
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
|
||||
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
||||
expr: |
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
@ -1210,7 +1255,9 @@ spec:
|
||||
severity: warning
|
||||
- alert: CPUThrottlingHigh
|
||||
annotations:
|
||||
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
|
||||
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace
|
||||
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
|
||||
$labels.pod }}.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
||||
expr: |
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
|
||||
@ -1224,7 +1271,9 @@ spec:
|
||||
rules:
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
|
||||
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
|
||||
}} free.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
|
||||
expr: |
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
@ -1236,7 +1285,9 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
|
||||
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is expected to fill up within four
|
||||
days. Currently {{ $value | humanizePercentage }} is available.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
|
||||
expr: |
|
||||
(
|
||||
@ -1251,7 +1302,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubePersistentVolumeErrors
|
||||
annotations:
|
||||
message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
|
||||
message: The persistent volume {{ $labels.persistentvolume }} has status {{
|
||||
$labels.phase }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
|
||||
expr: |
|
||||
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
|
||||
@ -1262,7 +1314,8 @@ spec:
|
||||
rules:
|
||||
- alert: KubeVersionMismatch
|
||||
annotations:
|
||||
message: There are {{ $value }} different semantic versions of Kubernetes components running.
|
||||
message: There are {{ $value }} different semantic versions of Kubernetes
|
||||
components running.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
|
||||
expr: |
|
||||
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
|
||||
@ -1271,7 +1324,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientErrors
|
||||
annotations:
|
||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
|
||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||
}}' is experiencing {{ $value | humanizePercentage }} errors.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
|
||||
expr: |
|
||||
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
|
||||
@ -1339,7 +1393,8 @@ spec:
|
||||
rules:
|
||||
- alert: KubeAPILatencyHigh
|
||||
annotations:
|
||||
message: The API server has an abnormal latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.
|
||||
message: The API server has an abnormal latency of {{ $value }} seconds for
|
||||
{{ $labels.verb }} {{ $labels.resource }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||
expr: |
|
||||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
|
||||
@ -1362,7 +1417,9 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value | humanizePercentage }} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
|
||||
}}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
|
||||
@ -1373,7 +1430,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
|
||||
message: A client certificate used to authenticate to the apiserver is expiring
|
||||
in less than 7.0 days.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||||
@ -1381,7 +1439,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeClientCertificateExpiration
|
||||
annotations:
|
||||
message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
|
||||
message: A client certificate used to authenticate to the apiserver is expiring
|
||||
in less than 24.0 hours.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||
expr: |
|
||||
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||||
@ -1389,7 +1448,10 @@ spec:
|
||||
severity: critical
|
||||
- alert: AggregatedAPIErrors
|
||||
annotations:
|
||||
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
|
||||
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has
|
||||
reported errors. The number of errors have increased for it in the past
|
||||
five minutes. High values indicate that the availability of the service
|
||||
changes too often.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
|
||||
expr: |
|
||||
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
|
||||
@ -1397,7 +1459,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: AggregatedAPIDown
|
||||
annotations:
|
||||
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down. It has not been available at least for the past five minutes.
|
||||
message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down.
|
||||
It has not been available at least for the past five minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
|
||||
expr: |
|
||||
sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0
|
||||
@ -1434,16 +1497,18 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletTooManyPods
|
||||
annotations:
|
||||
message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
|
||||
message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
|
||||
}} of its Pod capacity.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
||||
expr: |
|
||||
max(max(kubelet_running_pods{job="kubelet", metrics_path="/metrics"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"} != 1) by(node) > 0.95
|
||||
max(max(kubelet_running_pod_count{job="kubelet", metrics_path="/metrics"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"} != 1) by(node) > 0.95
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeNodeReadinessFlapping
|
||||
annotations:
|
||||
message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
|
||||
message: The readiness status of node {{ $labels.node }} has changed {{ $value
|
||||
}} times in the last 15 minutes.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
|
||||
expr: |
|
||||
sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
|
||||
@ -1452,7 +1517,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletPlegDurationHigh
|
||||
annotations:
|
||||
message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
|
||||
message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration
|
||||
of {{ $value }} seconds on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
|
||||
expr: |
|
||||
node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
|
||||
@ -1461,7 +1527,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeletPodStartUpLatencyHigh
|
||||
annotations:
|
||||
message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
|
||||
message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
|
||||
on node {{ $labels.node }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
|
||||
expr: |
|
||||
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
|
||||
@ -1503,7 +1570,8 @@ spec:
|
||||
rules:
|
||||
- alert: PrometheusBadConfig
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
||||
reload its configuration.
|
||||
summary: Failed Prometheus configuration reload.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
@ -1514,8 +1582,10 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusNotificationQueueRunningFull
|
||||
annotations:
|
||||
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
|
||||
summary: Prometheus alert notification queue predicted to run full in less than 30m.
|
||||
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}}
|
||||
is running full.
|
||||
summary: Prometheus alert notification queue predicted to run full in less
|
||||
than 30m.
|
||||
expr: |
|
||||
# Without min_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
@ -1529,8 +1599,10 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
||||
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
|
||||
description: '{{ printf "%.1f" $value }}% errors while sending alerts from
|
||||
Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
||||
summary: Prometheus has encountered more than 1% errors sending alerts to
|
||||
a specific Alertmanager.
|
||||
expr: |
|
||||
(
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
@ -1544,7 +1616,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||
annotations:
|
||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
|
||||
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||
expr: |
|
||||
min without(alertmanager) (
|
||||
@ -1559,7 +1632,8 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusNotConnectedToAlertmanagers
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
|
||||
to any Alertmanagers.
|
||||
summary: Prometheus is not connected to any Alertmanagers.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
@ -1570,7 +1644,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusTSDBReloadsFailing
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
||||
{{$value | humanize}} reload failures over the last 3h.
|
||||
summary: Prometheus has issues reloading blocks from disk.
|
||||
expr: |
|
||||
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
|
||||
@ -1579,7 +1654,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusTSDBCompactionsFailing
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
||||
{{$value | humanize}} compaction failures over the last 3h.
|
||||
summary: Prometheus has issues compacting blocks.
|
||||
expr: |
|
||||
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
|
||||
@ -1588,7 +1664,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusNotIngestingSamples
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
|
||||
samples.
|
||||
summary: Prometheus is not ingesting samples.
|
||||
expr: |
|
||||
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
|
||||
@ -1597,7 +1674,9 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusDuplicateTimestamps
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
||||
{{ printf "%.4g" $value }} samples/s with different values but duplicated
|
||||
timestamp.
|
||||
summary: Prometheus is dropping samples with duplicate timestamps.
|
||||
expr: |
|
||||
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
@ -1606,7 +1685,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusOutOfOrderTimestamps
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
||||
{{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
||||
summary: Prometheus drops samples with out-of-order timestamps.
|
||||
expr: |
|
||||
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
@ -1615,7 +1695,9 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusRemoteStorageFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
|
||||
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
|
||||
$labels.url }}
|
||||
summary: Prometheus fails to send samples to remote storage.
|
||||
expr: |
|
||||
(
|
||||
@ -1634,7 +1716,9 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteBehind
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
|
||||
}}.
|
||||
summary: Prometheus remote write is behind.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
@ -1650,8 +1734,13 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusRemoteWriteDesiredShards
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
|
||||
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||
desired shards calculation wants to run {{ $value }} shards for queue {{
|
||||
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
|
||||
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
|
||||
$labels.instance | query | first | value }}.
|
||||
summary: Prometheus remote write desired shards calculation wants to run more
|
||||
than configured max shards.
|
||||
expr: |
|
||||
# Without max_over_time, failed scrapes could create false negatives, see
|
||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||
@ -1665,7 +1754,8 @@ spec:
|
||||
severity: warning
|
||||
- alert: PrometheusRuleFailures
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
||||
evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
||||
summary: Prometheus is failing rule evaluations.
|
||||
expr: |
|
||||
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
@ -1674,7 +1764,8 @@ spec:
|
||||
severity: critical
|
||||
- alert: PrometheusMissingRuleEvaluations
|
||||
annotations:
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
|
||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{
|
||||
printf "%.0f" $value }} rule group evaluations in the last 5m.
|
||||
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
||||
expr: |
|
||||
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||
@ -1685,7 +1776,8 @@ spec:
|
||||
rules:
|
||||
- alert: AlertmanagerConfigInconsistent
|
||||
annotations:
|
||||
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.
|
||||
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
|
||||
are out of sync.
|
||||
expr: |
|
||||
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
|
||||
for: 5m
|
||||
@ -1693,7 +1785,8 @@ spec:
|
||||
severity: critical
|
||||
- alert: AlertmanagerFailedReload
|
||||
annotations:
|
||||
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.
|
||||
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
||||
}}/{{ $labels.pod}}.
|
||||
expr: |
|
||||
alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
|
||||
for: 10m
|
||||
@ -1713,8 +1806,10 @@ spec:
|
||||
rules:
|
||||
- alert: TargetDown
|
||||
annotations:
|
||||
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
|
||||
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
|
||||
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
|
||||
}} targets in {{ $labels.namespace }} namespace are down.'
|
||||
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
|
||||
namespace, service)) > 10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -1733,7 +1828,8 @@ spec:
|
||||
rules:
|
||||
- alert: NodeNetworkInterfaceFlapping
|
||||
annotations:
|
||||
message: Network interface "{{ $labels.device }}" changing it's up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
|
||||
message: Network interface "{{ $labels.device }}" changing it's up status
|
||||
often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
|
||||
expr: |
|
||||
changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
|
||||
for: 2m
|
||||
@ -1743,7 +1839,8 @@ spec:
|
||||
rules:
|
||||
- alert: PrometheusOperatorReconcileErrors
|
||||
annotations:
|
||||
message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.
|
||||
message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace
|
||||
}} Namespace.
|
||||
expr: |
|
||||
rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
|
||||
for: 10m
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -20,24 +20,31 @@ spec:
|
||||
description: PodMonitor defines monitoring for a set of pods.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: Specification of desired Pod selection for target discovery by Prometheus.
|
||||
description: Specification of desired Pod selection for target discovery
|
||||
by Prometheus.
|
||||
properties:
|
||||
jobLabel:
|
||||
description: The label to use to retrieve the job name from.
|
||||
type: string
|
||||
namespaceSelector:
|
||||
description: Selector to select which namespaces the Endpoints objects are discovered from.
|
||||
description: Selector to select which namespaces the Endpoints objects
|
||||
are discovered from.
|
||||
properties:
|
||||
any:
|
||||
description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||
description: Boolean describing whether all namespaces are selected
|
||||
in contrast to a list restricting them.
|
||||
type: boolean
|
||||
matchNames:
|
||||
description: List of namespace names.
|
||||
@ -48,45 +55,63 @@ spec:
|
||||
podMetricsEndpoints:
|
||||
description: A list of endpoints allowed as part of this PodMonitor.
|
||||
items:
|
||||
description: PodMetricsEndpoint defines a scrapeable endpoint of a Kubernetes Pod serving Prometheus metrics.
|
||||
description: PodMetricsEndpoint defines a scrapeable endpoint of
|
||||
a Kubernetes Pod serving Prometheus metrics.
|
||||
properties:
|
||||
honorLabels:
|
||||
description: HonorLabels chooses the metric's labels on collisions with target labels.
|
||||
description: HonorLabels chooses the metric's labels on collisions
|
||||
with target labels.
|
||||
type: boolean
|
||||
honorTimestamps:
|
||||
description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
|
||||
description: HonorTimestamps controls whether Prometheus respects
|
||||
the timestamps present in scraped data.
|
||||
type: boolean
|
||||
interval:
|
||||
description: Interval at which metrics should be scraped
|
||||
type: string
|
||||
metricRelabelings:
|
||||
description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||
description: MetricRelabelConfigs to apply to samples before
|
||||
ingestion.
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
description: 'RelabelConfig allows dynamic rewriting of the
|
||||
label set, being applied to samples before ingestion. It
|
||||
defines `<metric_relabel_configs>`-section of Prometheus
|
||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
description: Action to perform based on regex matching.
|
||||
Default is 'replace'
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
description: Modulus to take of the hash of the source
|
||||
label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
description: Regular expression against which the extracted
|
||||
value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
description: Replacement value against which a regex replace
|
||||
is performed if the regular expression matches. Regex
|
||||
capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
description: Separator placed between concatenated source
|
||||
label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
description: The source labels select values from existing
|
||||
labels. Their content is concatenated using the configured
|
||||
separator and matched against the configured regular
|
||||
expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
description: Label to which the resulting value is written
|
||||
in a replace action. It is mandatory for replace actions.
|
||||
Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
@ -101,39 +126,56 @@ spec:
|
||||
description: HTTP path to scrape for metrics.
|
||||
type: string
|
||||
port:
|
||||
description: Name of the pod port this endpoint refers to. Mutually exclusive with targetPort.
|
||||
description: Name of the pod port this endpoint refers to. Mutually
|
||||
exclusive with targetPort.
|
||||
type: string
|
||||
proxyUrl:
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
|
||||
to proxy through this endpoint.
|
||||
type: string
|
||||
relabelings:
|
||||
description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
description: 'RelabelConfigs to apply to samples before ingestion.
|
||||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
description: 'RelabelConfig allows dynamic rewriting of the
|
||||
label set, being applied to samples before ingestion. It
|
||||
defines `<metric_relabel_configs>`-section of Prometheus
|
||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
description: Action to perform based on regex matching.
|
||||
Default is 'replace'
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
description: Modulus to take of the hash of the source
|
||||
label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
description: Regular expression against which the extracted
|
||||
value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
description: Replacement value against which a regex replace
|
||||
is performed if the regular expression matches. Regex
|
||||
capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
description: Separator placed between concatenated source
|
||||
label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
description: The source labels select values from existing
|
||||
labels. Their content is concatenated using the configured
|
||||
separator and matched against the configured regular
|
||||
expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
description: Label to which the resulting value is written
|
||||
in a replace action. It is mandatory for replace actions.
|
||||
Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
@ -152,30 +194,42 @@ spec:
|
||||
type: object
|
||||
type: array
|
||||
podTargetLabels:
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod
|
||||
onto the target.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
sampleLimit:
|
||||
description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
|
||||
description: SampleLimit defines per-scrape limit on number of scraped
|
||||
samples that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
selector:
|
||||
description: Selector to select Pod objects.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
|
||||
description: matchExpressions is a list of label selector requirements.
|
||||
The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
|
||||
description: A label selector requirement is a selector that
|
||||
contains values, a key, and an operator that relates the key
|
||||
and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector applies to.
|
||||
description: key is the label key that the selector applies
|
||||
to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
|
||||
description: operator represents a key's relationship to
|
||||
a set of values. Valid operators are In, NotIn, Exists
|
||||
and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
|
||||
description: values is an array of string values. If the
|
||||
operator is In or NotIn, the values array must be non-empty.
|
||||
If the operator is Exists or DoesNotExist, the values
|
||||
array must be empty. This array is replaced during a strategic
|
||||
merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
@ -187,7 +241,11 @@ spec:
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||
description: matchLabels is a map of {key,value} pairs. A single
|
||||
{key,value} in the matchLabels map is equivalent to an element
|
||||
of matchExpressions, whose key field is "key", the operator
|
||||
is "In", and the values array contains only "value". The requirements
|
||||
are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
required:
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -20,10 +20,14 @@ spec:
|
||||
description: PrometheusRule defines alerting rules for a Prometheus instance
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
@ -33,7 +37,10 @@ spec:
|
||||
groups:
|
||||
description: Content of Prometheus rule file
|
||||
items:
|
||||
description: 'RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
|
||||
description: 'RuleGroup is a list of sequentially evaluated recording
|
||||
and alerting rules. Note: PartialResponseStrategy is only used
|
||||
by ThanosRuler and will be ignored by Prometheus instances. Valid
|
||||
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
|
||||
properties:
|
||||
interval:
|
||||
type: string
|
||||
|
@ -20,50 +20,65 @@ spec:
|
||||
description: ServiceMonitor defines monitoring for a set of services.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: Specification of desired Service selection for target discovery by Prometheus.
|
||||
description: Specification of desired Service selection for target discovery
|
||||
by Prometheus.
|
||||
properties:
|
||||
endpoints:
|
||||
description: A list of endpoints allowed as part of this ServiceMonitor.
|
||||
items:
|
||||
description: Endpoint defines a scrapeable endpoint serving Prometheus metrics.
|
||||
description: Endpoint defines a scrapeable endpoint serving Prometheus
|
||||
metrics.
|
||||
properties:
|
||||
basicAuth:
|
||||
description: 'BasicAuth allow an endpoint to authenticate over basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
|
||||
description: 'BasicAuth allow an endpoint to authenticate over
|
||||
basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
|
||||
properties:
|
||||
password:
|
||||
description: The secret in the service monitor namespace that contains the password for authentication.
|
||||
description: The secret in the service monitor namespace
|
||||
that contains the password for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
username:
|
||||
description: The secret in the service monitor namespace that contains the username for authentication.
|
||||
description: The secret in the service monitor namespace
|
||||
that contains the username for authentication.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
@ -73,57 +88,79 @@ spec:
|
||||
description: File to read bearer token for scraping targets.
|
||||
type: string
|
||||
bearerTokenSecret:
|
||||
description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the service monitor and accessible by the Prometheus Operator.
|
||||
description: Secret to mount to read bearer token for scraping
|
||||
targets. The secret needs to be in the same namespace as the
|
||||
service monitor and accessible by the Prometheus Operator.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
honorLabels:
|
||||
description: HonorLabels chooses the metric's labels on collisions with target labels.
|
||||
description: HonorLabels chooses the metric's labels on collisions
|
||||
with target labels.
|
||||
type: boolean
|
||||
honorTimestamps:
|
||||
description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
|
||||
description: HonorTimestamps controls whether Prometheus respects
|
||||
the timestamps present in scraped data.
|
||||
type: boolean
|
||||
interval:
|
||||
description: Interval at which metrics should be scraped
|
||||
type: string
|
||||
metricRelabelings:
|
||||
description: MetricRelabelConfigs to apply to samples before ingestion.
|
||||
description: MetricRelabelConfigs to apply to samples before
|
||||
ingestion.
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
description: 'RelabelConfig allows dynamic rewriting of the
|
||||
label set, being applied to samples before ingestion. It
|
||||
defines `<metric_relabel_configs>`-section of Prometheus
|
||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
description: Action to perform based on regex matching.
|
||||
Default is 'replace'
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
description: Modulus to take of the hash of the source
|
||||
label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
description: Regular expression against which the extracted
|
||||
value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
description: Replacement value against which a regex replace
|
||||
is performed if the regular expression matches. Regex
|
||||
capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
description: Separator placed between concatenated source
|
||||
label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
description: The source labels select values from existing
|
||||
labels. Their content is concatenated using the configured
|
||||
separator and matched against the configured regular
|
||||
expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
description: Label to which the resulting value is written
|
||||
in a replace action. It is mandatory for replace actions.
|
||||
Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
@ -138,39 +175,56 @@ spec:
|
||||
description: HTTP path to scrape for metrics.
|
||||
type: string
|
||||
port:
|
||||
description: Name of the service port this endpoint refers to. Mutually exclusive with targetPort.
|
||||
description: Name of the service port this endpoint refers to.
|
||||
Mutually exclusive with targetPort.
|
||||
type: string
|
||||
proxyUrl:
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
|
||||
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
|
||||
to proxy through this endpoint.
|
||||
type: string
|
||||
relabelings:
|
||||
description: 'RelabelConfigs to apply to samples before scraping. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
description: 'RelabelConfigs to apply to samples before scraping.
|
||||
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
|
||||
items:
|
||||
description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
description: 'RelabelConfig allows dynamic rewriting of the
|
||||
label set, being applied to samples before ingestion. It
|
||||
defines `<metric_relabel_configs>`-section of Prometheus
|
||||
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
|
||||
properties:
|
||||
action:
|
||||
description: Action to perform based on regex matching. Default is 'replace'
|
||||
description: Action to perform based on regex matching.
|
||||
Default is 'replace'
|
||||
type: string
|
||||
modulus:
|
||||
description: Modulus to take of the hash of the source label values.
|
||||
description: Modulus to take of the hash of the source
|
||||
label values.
|
||||
format: int64
|
||||
type: integer
|
||||
regex:
|
||||
description: Regular expression against which the extracted value is matched. Default is '(.*)'
|
||||
description: Regular expression against which the extracted
|
||||
value is matched. Default is '(.*)'
|
||||
type: string
|
||||
replacement:
|
||||
description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
|
||||
description: Replacement value against which a regex replace
|
||||
is performed if the regular expression matches. Regex
|
||||
capture groups are available. Default is '$1'
|
||||
type: string
|
||||
separator:
|
||||
description: Separator placed between concatenated source label values. default is ';'.
|
||||
description: Separator placed between concatenated source
|
||||
label values. default is ';'.
|
||||
type: string
|
||||
sourceLabels:
|
||||
description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
|
||||
description: The source labels select values from existing
|
||||
labels. Their content is concatenated using the configured
|
||||
separator and matched against the configured regular
|
||||
expression for the replace, keep, and drop actions.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
targetLabel:
|
||||
description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
|
||||
description: Label to which the resulting value is written
|
||||
in a replace action. It is mandatory for replace actions.
|
||||
Regex capture groups are available.
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
@ -184,25 +238,31 @@ spec:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: Name or number of the pod port this endpoint refers to. Mutually exclusive with port.
|
||||
description: Name or number of the pod port this endpoint refers
|
||||
to. Mutually exclusive with port.
|
||||
x-kubernetes-int-or-string: true
|
||||
tlsConfig:
|
||||
description: TLS configuration to use when scraping the endpoint
|
||||
properties:
|
||||
ca:
|
||||
description: Stuct containing the CA cert to use for the targets.
|
||||
description: Stuct containing the CA cert to use for the
|
||||
targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
description: ConfigMap containing data to use for the
|
||||
targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
description: Specify whether the ConfigMap or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
@ -211,35 +271,45 @@ spec:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
description: Specify whether the Secret or its key
|
||||
must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
type: object
|
||||
caFile:
|
||||
description: Path to the CA cert in the Prometheus container to use for the targets.
|
||||
description: Path to the CA cert in the Prometheus container
|
||||
to use for the targets.
|
||||
type: string
|
||||
cert:
|
||||
description: Struct containing the client cert file for the targets.
|
||||
description: Struct containing the client cert file for
|
||||
the targets.
|
||||
properties:
|
||||
configMap:
|
||||
description: ConfigMap containing data to use for the targets.
|
||||
description: ConfigMap containing data to use for the
|
||||
targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key to select.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the ConfigMap or its key must be defined
|
||||
description: Specify whether the ConfigMap or its
|
||||
key must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
@ -248,38 +318,48 @@ spec:
|
||||
description: Secret containing data to use for the targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind,
|
||||
uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
description: Specify whether the Secret or its key
|
||||
must be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
type: object
|
||||
type: object
|
||||
certFile:
|
||||
description: Path to the client cert file in the Prometheus container for the targets.
|
||||
description: Path to the client cert file in the Prometheus
|
||||
container for the targets.
|
||||
type: string
|
||||
insecureSkipVerify:
|
||||
description: Disable target certificate validation.
|
||||
type: boolean
|
||||
keyFile:
|
||||
description: Path to the client key file in the Prometheus container for the targets.
|
||||
description: Path to the client key file in the Prometheus
|
||||
container for the targets.
|
||||
type: string
|
||||
keySecret:
|
||||
description: Secret containing the client key file for the targets.
|
||||
description: Secret containing the client key file for the
|
||||
targets.
|
||||
properties:
|
||||
key:
|
||||
description: The key of the secret to select from. Must be a valid secret key.
|
||||
description: The key of the secret to select from. Must
|
||||
be a valid secret key.
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
|
||||
TODO: Add other useful fields. apiVersion, kind, uid?'
|
||||
type: string
|
||||
optional:
|
||||
description: Specify whether the Secret or its key must be defined
|
||||
description: Specify whether the Secret or its key must
|
||||
be defined
|
||||
type: boolean
|
||||
required:
|
||||
- key
|
||||
@ -294,10 +374,12 @@ spec:
|
||||
description: The label to use to retrieve the job name from.
|
||||
type: string
|
||||
namespaceSelector:
|
||||
description: Selector to select which namespaces the Endpoints objects are discovered from.
|
||||
description: Selector to select which namespaces the Endpoints objects
|
||||
are discovered from.
|
||||
properties:
|
||||
any:
|
||||
description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
|
||||
description: Boolean describing whether all namespaces are selected
|
||||
in contrast to a list restricting them.
|
||||
type: boolean
|
||||
matchNames:
|
||||
description: List of namespace names.
|
||||
@ -306,30 +388,42 @@ spec:
|
||||
type: array
|
||||
type: object
|
||||
podTargetLabels:
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
|
||||
description: PodTargetLabels transfers labels on the Kubernetes Pod
|
||||
onto the target.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
sampleLimit:
|
||||
description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
|
||||
description: SampleLimit defines per-scrape limit on number of scraped
|
||||
samples that will be accepted.
|
||||
format: int64
|
||||
type: integer
|
||||
selector:
|
||||
description: Selector to select Endpoints objects.
|
||||
properties:
|
||||
matchExpressions:
|
||||
description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
|
||||
description: matchExpressions is a list of label selector requirements.
|
||||
The requirements are ANDed.
|
||||
items:
|
||||
description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
|
||||
description: A label selector requirement is a selector that
|
||||
contains values, a key, and an operator that relates the key
|
||||
and values.
|
||||
properties:
|
||||
key:
|
||||
description: key is the label key that the selector applies to.
|
||||
description: key is the label key that the selector applies
|
||||
to.
|
||||
type: string
|
||||
operator:
|
||||
description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
|
||||
description: operator represents a key's relationship to
|
||||
a set of values. Valid operators are In, NotIn, Exists
|
||||
and DoesNotExist.
|
||||
type: string
|
||||
values:
|
||||
description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
|
||||
description: values is an array of string values. If the
|
||||
operator is In or NotIn, the values array must be non-empty.
|
||||
If the operator is Exists or DoesNotExist, the values
|
||||
array must be empty. This array is replaced during a strategic
|
||||
merge patch.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
@ -341,11 +435,16 @@ spec:
|
||||
matchLabels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
|
||||
description: matchLabels is a map of {key,value} pairs. A single
|
||||
{key,value} in the matchLabels map is equivalent to an element
|
||||
of matchExpressions, whose key field is "key", the operator
|
||||
is "In", and the values array contains only "value". The requirements
|
||||
are ANDed.
|
||||
type: object
|
||||
type: object
|
||||
targetLabels:
|
||||
description: TargetLabels transfers labels on the Kubernetes Service onto the target.
|
||||
description: TargetLabels transfers labels on the Kubernetes Service
|
||||
onto the target.
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -92,29 +92,42 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
// Creates ingress objects
|
||||
newIngress(name, namespace, host, path, serviceName, servicePort):: (
|
||||
local ingress = k.extensions.v1beta1.ingress;
|
||||
local ingressTls = ingress.mixin.spec.tlsType;
|
||||
local ingressRule = ingress.mixin.spec.rulesType;
|
||||
local httpIngressPath = ingressRule.mixin.http.pathsType;
|
||||
|
||||
ingress.new()
|
||||
+ ingress.mixin.metadata.withName(name)
|
||||
+ ingress.mixin.metadata.withNamespace(namespace)
|
||||
+ ingress.mixin.spec.withRules(
|
||||
ingressRule.new()
|
||||
+ ingressRule.withHost(host)
|
||||
+ ingressRule.mixin.http.withPaths(
|
||||
httpIngressPath.new()
|
||||
+ httpIngressPath.withPath(path)
|
||||
+ httpIngressPath.mixin.backend.withServiceName(serviceName)
|
||||
+ httpIngressPath.mixin.backend.withServicePort(servicePort)
|
||||
),
|
||||
)
|
||||
{
|
||||
apiVersion: 'networking.k8s.io/v1',
|
||||
kind: 'Ingress',
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
rules: [
|
||||
{
|
||||
host: host,
|
||||
http: {
|
||||
paths: [
|
||||
{
|
||||
backend: {
|
||||
service: {
|
||||
name: serviceName,
|
||||
port: {
|
||||
name: servicePort,
|
||||
},
|
||||
},
|
||||
},
|
||||
path: path,
|
||||
pathType: 'Prefix',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
),
|
||||
|
||||
// Add TLS to Ingress resource with secret containing the certificates if exists
|
||||
addIngressTLS(I, S=''):: (
|
||||
local ingress = k.extensions.v1beta1.ingress;
|
||||
local ingress = k.networking.v1beta1.ingress;
|
||||
local ingressTls = ingress.mixin.spec.tlsType;
|
||||
local host = I.spec.rules[0].host;
|
||||
local namespace = I.metadata.namespace;
|
||||
|
Loading…
Reference in New Issue
Block a user