diff --git a/base_operator_stack.jsonnet b/base_operator_stack.jsonnet
index 03328e9..0184939 100644
--- a/base_operator_stack.jsonnet
+++ b/base_operator_stack.jsonnet
@@ -1,4 +1,4 @@
-local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
+local k = import 'ksonnet-lib/ksonnet.beta.4/k.libsonnet';
local utils = import 'utils.libsonnet';
local vars = import 'vars.jsonnet';
@@ -17,7 +17,7 @@ local vars = import 'vars.jsonnet';
prometheus+:: {
names: 'k8s',
replicas: 1,
- namespaces: ['default', 'kube-system', 'monitoring'],
+ namespaces: ['default', 'kube-system', vars._config.namespace],
},
alertmanager+:: {
@@ -60,7 +60,7 @@ local vars = import 'vars.jsonnet';
},
},
plugins: vars.grafana.plugins,
- env: vars.grafana.env
+ env: vars.grafana.env,
},
},
//---------------------------------------
diff --git a/jsonnetfile.json b/jsonnetfile.json
index 420c9b3..7d75ad2 100644
--- a/jsonnetfile.json
+++ b/jsonnetfile.json
@@ -4,10 +4,18 @@
{
"source": {
"git": {
- "remote": "https://github.com/coreos/kube-prometheus.git",
+ "remote": "https://github.com/prometheus-operator/kube-prometheus.git",
"subdir": "jsonnet/kube-prometheus"
}
},
+ "version": "main"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/ksonnet/ksonnet-lib.git"
+ }
+ },
"version": "master"
}
],
diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json
index 49ad402..06cee35 100644
--- a/jsonnetfile.lock.json
+++ b/jsonnetfile.lock.json
@@ -8,38 +8,28 @@
"subdir": "grafana"
}
},
- "version": "57b4365eacda291b82e0d55ba7eec573a8198dda",
- "sum": "92DWADwGjnCfpZaL7Q07C0GZayxBziGla/O03qWea34="
+ "version": "d039275e4916aceae1c137120882e01d857787ac",
+ "sum": "515vMn4x4tP8vegL4HLW0nDO5+njGTgnDZB5OOhtsCI="
},
{
"source": {
"git": {
- "remote": "https://github.com/coreos/etcd.git",
- "subdir": "Documentation/etcd-mixin"
+ "remote": "https://github.com/etcd-io/etcd.git",
+ "subdir": "contrib/mixin"
}
},
- "version": "d8c8f903eee10b8391abaef7758c38b2cd393c55",
- "sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398="
+ "version": "e73f55d4e94666c99558baa2fd4e365aeaca4dc4",
+ "sum": "IkDHlaE0gvvcPjSNurFT+jQ2aCOAbqHF1WVmXbAgkds="
},
{
"source": {
"git": {
- "remote": "https://github.com/coreos/kube-prometheus.git",
- "subdir": "jsonnet/kube-prometheus"
+ "remote": "https://github.com/grafana/grafana.git",
+ "subdir": "grafana-mixin"
}
},
- "version": "17989b42aa10b1c6afa07043cb05bcd5ae492284",
- "sum": "2FR289B1LGUf5tTN4PXBj5TjRX7okSFxE8uHkSslzDQ="
- },
- {
- "source": {
- "git": {
- "remote": "https://github.com/coreos/prometheus-operator.git",
- "subdir": "jsonnet/prometheus-operator"
- }
- },
- "version": "e31c69f9b5c6555e0f4a5c1f39d0f03182dd6b41",
- "sum": "WggWVWZ+CBEUThQCztSaRELbtqdXf9s3OFzf06HbYNA="
+ "version": "1120f9e255760a3c104b57871fcb91801e934382",
+ "sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
},
{
"source": {
@@ -48,8 +38,18 @@
"subdir": "grafonnet"
}
},
- "version": "8fb95bd89990e493a8534205ee636bfcb8db67bd",
- "sum": "tDuuSKE9f4Ew2bjBM33Rs6behLEAzkmKkShSt+jpAak="
+ "version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
+ "sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/grafonnet-lib.git",
+ "subdir": "grafonnet-7.0"
+ }
+ },
+ "version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
+ "sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
},
{
"source": {
@@ -58,8 +58,8 @@
"subdir": "grafana-builder"
}
},
- "version": "881db2241f0c5007c3e831caf34b0c645202b4ab",
- "sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
+ "version": "d68f9a6e0b1af7c4c4056dc2b43fb8f3bac01f43",
+ "sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
},
{
"source": {
@@ -69,8 +69,7 @@
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
- "sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=",
- "name": "ksonnet"
+ "sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg="
},
{
"source": {
@@ -79,18 +78,8 @@
"subdir": ""
}
},
- "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
- "sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY="
- },
- {
- "source": {
- "git": {
- "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
- "subdir": "lib/promgrafonnet"
- }
- },
- "version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
- "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
+ "version": "3c386687c1f8ceb6b79ff887c4a934e9cee1b90a",
+ "sum": "H8lcnk7gQEUoRi58/xq+JTfd2PcjJUjMQHgxGklUiFY="
},
{
"source": {
@@ -99,8 +88,8 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
- "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
- "sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
+ "version": "71200632a6c97e9b87166dbb27489798a05effe3",
+ "sum": "4PJ2ROxODsoYO/1Y70+dgLZVjW5zlfzB+TDpxJBHwaI="
},
{
"source": {
@@ -109,8 +98,50 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
- "version": "d667979ed55ad1c4db44d331b51d646f5b903aa7",
- "sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8="
+ "version": "71200632a6c97e9b87166dbb27489798a05effe3",
+ "sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus-operator/kube-prometheus.git",
+ "subdir": "jsonnet/kube-prometheus"
+ }
+ },
+ "version": "37d00082289c587f5a02a343ba23cfbe167000e2",
+ "sum": "5onAaPSrjnmgXIAsypnx0W/sIA7iTsHCeCjPrhGxj5A="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus-operator/prometheus-operator.git",
+ "subdir": "jsonnet/mixin"
+ }
+ },
+ "version": "df4cbd9526d8ff8e404a903b7ed2532847551d19",
+ "sum": "GQmaVFJwKMiD/P4n3N2LrAZVcwutriWrP8joclDtBYQ=",
+ "name": "prometheus-operator-mixin"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus-operator/prometheus-operator.git",
+ "subdir": "jsonnet/prometheus-operator"
+ }
+ },
+ "version": "df4cbd9526d8ff8e404a903b7ed2532847551d19",
+ "sum": "wJ1E8XxYJ0RJrUuDNWLzE7bzo6JrH7P9q1lAu/xi4Ow="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus/alertmanager.git",
+ "subdir": "doc/alertmanager-mixin"
+ }
+ },
+ "version": "8afd462a9eaa3979bddf7bd6278bede4bc1f30e2",
+ "sum": "PsK+V7oETCPKu2gLoPfqY0wwPKH9TzhNj6o2xezjjXc=",
+ "name": "alertmanager"
},
{
"source": {
@@ -119,8 +150,8 @@
"subdir": "docs/node-mixin"
}
},
- "version": "08ce3c6dd430deb51798826701a395e460620d60",
- "sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc="
+ "version": "a3bd2e13052929663dbd7d680fab4a952efb1de6",
+ "sum": "TwdaTm0Z++diiLyaKAAimmC6hBL7XbrJc0RHhBCpAdU="
},
{
"source": {
@@ -129,9 +160,30 @@
"subdir": "documentation/prometheus-mixin"
}
},
- "version": "74207c04655e1fd93eea0e9a5d2f31b1cbc4d3d0",
- "sum": "lEzhZ8gllSfAO4kmXeTwl4W0anapIeFd5GCaCNuDe18=",
+ "version": "84c6f0e58444a452a5e2e19d14221409d2b9d790",
+ "sum": "LRx0tbMnoE1p8KEn+i81j2YsA5Sgt3itE5Y6jBf5eOQ=",
"name": "prometheus"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/pyrra-dev/pyrra.git",
+ "subdir": "config/crd/bases"
+ }
+ },
+ "version": "2584cefb8e6859eb9ee103df199e232cd0066aab",
+ "sum": "d1550yhsX4VxdVN7b0gWT0cido/W90P6OGLzLqPwZcs="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/thanos-io/thanos.git",
+ "subdir": "mixin"
+ }
+ },
+ "version": "3327c510076a77f876ac26e699d5252a61fc529a",
+ "sum": "Io++1+lp1oQVoQiVRSCXUiGdTIRPV7aL6Ewgs3bShEs=",
+ "name": "thanos-mixin"
}
],
"legacyImports": false
diff --git a/main.jsonnet b/main.jsonnet
index 681366a..10cef07 100644
--- a/main.jsonnet
+++ b/main.jsonnet
@@ -1,10 +1,10 @@
local utils = import 'utils.libsonnet';
local vars = import 'vars.jsonnet';
-local kp = (import 'kube-prometheus/kube-prometheus.libsonnet')
- + (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet')
- + (import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet')
- + (import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet')
+local kp = (import 'kube-prometheus/main.libsonnet') + { values+:: { common+: { namespace: vars._config.namespace } } }
+ + (import 'kube-prometheus/addons/anti-affinity.libsonnet')
+ + (import 'kube-prometheus/platforms/kops-coredns.libsonnet')
+ + (import 'kube-prometheus/platforms/kubeadm.libsonnet')
// Additional modules are loaded dynamically from vars.jsonnet
+ utils.join_objects([module.file for module in vars.modules if module.enabled])
// Load K3s customized modules
@@ -14,6 +14,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet')
// Load image versions last to override default from modules
+ (import 'image_sources_versions.jsonnet');
+
// Generate core modules
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) }
// First generate operator resources except the serviceMonitors
diff --git a/manifests/alertmanager-alertmanager.yaml b/manifests/alertmanager-alertmanager.yaml
index cedc323..8086809 100644
--- a/manifests/alertmanager-alertmanager.yaml
+++ b/manifests/alertmanager-alertmanager.yaml
@@ -2,7 +2,11 @@ apiVersion: monitoring.coreos.com/v1
kind: Alertmanager
metadata:
labels:
- alertmanager: main
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
name: main
namespace: monitoring
spec:
@@ -11,22 +15,36 @@ spec:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
- matchExpressions:
- - key: alertmanager
- operator: In
- values:
- - main
+ matchLabels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
namespaces:
- monitoring
topologyKey: kubernetes.io/hostname
weight: 100
- image: prom/alertmanager:v0.21.0
+ image: quay.io/prometheus/alertmanager:v0.25.0
nodeSelector:
kubernetes.io/os: linux
- replicas: 1
+ podMetadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ replicas: 3
+ resources:
+ limits:
+ cpu: 100m
+ memory: 100Mi
+ requests:
+ cpu: 4m
+ memory: 100Mi
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: alertmanager-main
- version: v0.21.0
+ version: 0.25.0
diff --git a/manifests/alertmanager-networkPolicy.yaml b/manifests/alertmanager-networkPolicy.yaml
new file mode 100644
index 0000000..d84f477
--- /dev/null
+++ b/manifests/alertmanager-networkPolicy.yaml
@@ -0,0 +1,42 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ name: alertmanager-main
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 9093
+ protocol: TCP
+ - port: 8080
+ protocol: TCP
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: alertmanager
+ ports:
+ - port: 9094
+ protocol: TCP
+ - port: 9094
+ protocol: UDP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/alertmanager-podDisruptionBudget.yaml b/manifests/alertmanager-podDisruptionBudget.yaml
new file mode 100644
index 0000000..85cae78
--- /dev/null
+++ b/manifests/alertmanager-podDisruptionBudget.yaml
@@ -0,0 +1,19 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ name: alertmanager-main
+ namespace: monitoring
+spec:
+ maxUnavailable: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/alertmanager-prometheusRule.yaml b/manifests/alertmanager-prometheusRule.yaml
new file mode 100644
index 0000000..534bca2
--- /dev/null
+++ b/manifests/alertmanager-prometheusRule.yaml
@@ -0,0 +1,139 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ prometheus: k8s
+ role: alert-rules
+ name: alertmanager-main-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: alertmanager.rules
+ rules:
+ - alert: AlertmanagerFailedReload
+ annotations:
+ description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload
+ summary: Reloading an Alertmanager configuration has failed.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"}[5m]) == 0
+ for: 10m
+ labels:
+ severity: critical
+ - alert: AlertmanagerMembersInconsistent
+ annotations:
+ description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent
+ summary: A member of an Alertmanager cluster has not found all other cluster members.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m])
+ < on (namespace,service) group_left
+ count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}[5m]))
+ for: 15m
+ labels:
+ severity: critical
+ - alert: AlertmanagerFailedToSendAlerts
+ annotations:
+ description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts
+ summary: An Alertmanager instance failed to send notifications.
+ expr: |
+ (
+ rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
+ /
+ rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
+ )
+ > 0.01
+ for: 5m
+ labels:
+ severity: warning
+ - alert: AlertmanagerClusterFailedToSendAlerts
+ annotations:
+ description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
+ summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
+ expr: |
+ min by (namespace,service, integration) (
+ rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
+ /
+ rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
+ )
+ > 0.01
+ for: 5m
+ labels:
+ severity: critical
+ - alert: AlertmanagerClusterFailedToSendAlerts
+ annotations:
+ description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
+ summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
+ expr: |
+ min by (namespace,service, integration) (
+ rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
+ /
+ rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
+ )
+ > 0.01
+ for: 5m
+ labels:
+ severity: warning
+ - alert: AlertmanagerConfigInconsistent
+ annotations:
+ description: Alertmanager instances within the {{$labels.job}} cluster have different configurations.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent
+ summary: Alertmanager instances within the same cluster have different configurations.
+ expr: |
+ count by (namespace,service) (
+ count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})
+ )
+ != 1
+ for: 20m
+ labels:
+ severity: critical
+ - alert: AlertmanagerClusterDown
+ annotations:
+ description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown
+ summary: Half or more of the Alertmanager instances within the same cluster are down.
+ expr: |
+ (
+ count by (namespace,service) (
+ avg_over_time(up{job="alertmanager-main",namespace="monitoring"}[5m]) < 0.5
+ )
+ /
+ count by (namespace,service) (
+ up{job="alertmanager-main",namespace="monitoring"}
+ )
+ )
+ >= 0.5
+ for: 5m
+ labels:
+ severity: critical
+ - alert: AlertmanagerClusterCrashlooping
+ annotations:
+ description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping
+ summary: Half or more of the Alertmanager instances within the same cluster are crashlooping.
+ expr: |
+ (
+ count by (namespace,service) (
+ changes(process_start_time_seconds{job="alertmanager-main",namespace="monitoring"}[10m]) > 4
+ )
+ /
+ count by (namespace,service) (
+ up{job="alertmanager-main",namespace="monitoring"}
+ )
+ )
+ >= 0.5
+ for: 5m
+ labels:
+ severity: critical
diff --git a/manifests/alertmanager-secret.yaml b/manifests/alertmanager-secret.yaml
index e019922..54dfb43 100644
--- a/manifests/alertmanager-secret.yaml
+++ b/manifests/alertmanager-secret.yaml
@@ -1,7 +1,12 @@
apiVersion: v1
-data: {}
kind: Secret
metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
name: alertmanager-main
namespace: monitoring
stringData:
@@ -12,21 +17,28 @@ stringData:
- "equal":
- "namespace"
- "alertname"
- "source_match":
- "severity": "critical"
- "target_match_re":
- "severity": "warning|info"
+ "source_matchers":
+ - "severity = critical"
+ "target_matchers":
+ - "severity =~ warning|info"
- "equal":
- "namespace"
- "alertname"
- "source_match":
- "severity": "warning"
- "target_match_re":
- "severity": "info"
+ "source_matchers":
+ - "severity = warning"
+ "target_matchers":
+ - "severity = info"
+ - "equal":
+ - "namespace"
+ "source_matchers":
+ - "alertname = InfoInhibitor"
+ "target_matchers":
+ - "severity = info"
"receivers":
- "name": "Default"
- "name": "Watchdog"
- "name": "Critical"
+ - "name": "null"
"route":
"group_by":
- "namespace"
@@ -35,10 +47,13 @@ stringData:
"receiver": "Default"
"repeat_interval": "12h"
"routes":
- - "match":
- "alertname": "Watchdog"
+ - "matchers":
+ - "alertname = Watchdog"
"receiver": "Watchdog"
- - "match":
- "severity": "critical"
+ - "matchers":
+ - "alertname = InfoInhibitor"
+ "receiver": "null"
+ - "matchers":
+ - "severity = critical"
"receiver": "Critical"
type: Opaque
diff --git a/manifests/alertmanager-service.yaml b/manifests/alertmanager-service.yaml
index df4c9ff..33c960d 100644
--- a/manifests/alertmanager-service.yaml
+++ b/manifests/alertmanager-service.yaml
@@ -2,7 +2,11 @@ apiVersion: v1
kind: Service
metadata:
labels:
- alertmanager: main
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
name: alertmanager-main
namespace: monitoring
spec:
@@ -10,7 +14,12 @@ spec:
- name: web
port: 9093
targetPort: web
+ - name: reloader-web
+ port: 8080
+ targetPort: reloader-web
selector:
- alertmanager: main
- app: alertmanager
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
diff --git a/manifests/alertmanager-serviceAccount.yaml b/manifests/alertmanager-serviceAccount.yaml
index 5c06d5e..dc2eb85 100644
--- a/manifests/alertmanager-serviceAccount.yaml
+++ b/manifests/alertmanager-serviceAccount.yaml
@@ -1,5 +1,12 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
name: alertmanager-main
namespace: monitoring
diff --git a/manifests/alertmanager-serviceMonitor.yaml b/manifests/alertmanager-serviceMonitor.yaml
index 548af0d..492a9f0 100644
--- a/manifests/alertmanager-serviceMonitor.yaml
+++ b/manifests/alertmanager-serviceMonitor.yaml
@@ -2,13 +2,22 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
- k8s-app: alertmanager
- name: alertmanager
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.25.0
+ name: alertmanager-main
namespace: monitoring
spec:
endpoints:
- interval: 30s
port: web
+ - interval: 30s
+ port: reloader-web
selector:
matchLabels:
- alertmanager: main
+ app.kubernetes.io/component: alert-router
+ app.kubernetes.io/instance: main
+ app.kubernetes.io/name: alertmanager
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/grafana-config.yaml b/manifests/grafana-config.yaml
index 750e3c8..10d9c6a 100644
--- a/manifests/grafana-config.yaml
+++ b/manifests/grafana-config.yaml
@@ -1,8 +1,15 @@
apiVersion: v1
-data:
- grafana.ini: W2F1dGguYW5vbnltb3VzXQplbmFibGVkID0gZmFsc2UKW2F1dGguYmFzaWNdCmVuYWJsZWQgPSBmYWxzZQpbc2Vzc2lvbl0KcHJvdmlkZXIgPSBtZW1vcnkKW3NtdHBdCmVuYWJsZWQgPSB0cnVlCmZyb21fYWRkcmVzcyA9IG15ZW1haWxAZ21haWwuY29tCmZyb21fbmFtZSA9IEdyYWZhbmEgQWxlcnQKaG9zdCA9IHNtdHAtc2VydmVyLm1vbml0b3Jpbmcuc3ZjOjI1CnBhc3N3b3JkID0gCnNraXBfdmVyaWZ5ID0gdHJ1ZQp1c2VyID0gCg==
kind: Secret
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-config
namespace: monitoring
+stringData:
+ grafana.ini: |
+ [date_formats]
+ default_timezone = UTC
type: Opaque
diff --git a/manifests/grafana-dashboardDatasources.yaml b/manifests/grafana-dashboardDatasources.yaml
index 22d4748..f4c4cde 100644
--- a/manifests/grafana-dashboardDatasources.yaml
+++ b/manifests/grafana-dashboardDatasources.yaml
@@ -1,8 +1,27 @@
apiVersion: v1
-data:
- datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
kind: Secret
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-datasources
namespace: monitoring
+stringData:
+ datasources.yaml: |-
+ {
+ "apiVersion": 1,
+ "datasources": [
+ {
+ "access": "proxy",
+ "editable": false,
+ "name": "prometheus",
+ "orgId": 1,
+ "type": "prometheus",
+ "url": "http://prometheus-k8s.monitoring.svc:9090",
+ "version": 1
+ }
+ ]
+ }
type: Opaque
diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml
index e7a908e..9ec56fe 100644
--- a/manifests/grafana-dashboardDefinitions.yaml
+++ b/manifests/grafana-dashboardDefinitions.yaml
@@ -1,5 +1,608 @@
apiVersion: v1
items:
+- apiVersion: v1
+ data:
+ alertmanager-overview.json: |-
+ {
+ "__inputs": [
+
+ ],
+ "__requires": [
+
+ ],
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [
+
+ ],
+ "refresh": "30s",
+ "rows": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "current set of alerts stored in the Alertmanager",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"}) by (namespace,service,instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Alerts",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "rate of successful and invalid alerts received by the Alertmanager",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Received",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Invalid",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Alerts receive rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Alerts",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "rate of successful and invalid notifications sent by the Alertmanager",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "integration",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Total",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Failed",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$integration: Notifications Send Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "latency of notifications sent by the Alertmanager",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "integration",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} 99th Percentile",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n) \n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Median",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} Average",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$integration: Notification Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Notifications",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "alertmanager-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "Prometheus",
+ "value": "Prometheus"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "namespace",
+ "multi": false,
+ "name": "namespace",
+ "options": [
+
+ ],
+ "query": "label_values(alertmanager_alerts, namespace)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "service",
+ "multi": false,
+ "name": "service",
+ "options": [
+
+ ],
+ "query": "label_values(alertmanager_alerts, service)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "all",
+ "value": "$__all"
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "integration",
+ "options": [
+
+ ],
+ "query": "label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Alertmanager / Overview",
+ "uid": "alertmanager-overview",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-alertmanager-overview
+ namespace: monitoring
- apiVersion: v1
data:
apiserver.json: |-
@@ -71,7 +674,11 @@ items:
},
"id": 3,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -110,7 +717,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "apiserver_request:availability30d{verb=\"all\"}",
+ "expr": "apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -142,19 +749,21 @@ items:
"dashes": false,
"datasource": "$datasource",
"decimals": 3,
- "description": "How much error budget is left looking at our 0.990% availability gurantees?",
+ "description": "How much error budget is left looking at our 0.990% availability guarantees?",
"fill": 10,
+ "fillGradient": 0,
"gridPos": {
},
"id": 4,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -180,7 +789,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "100 * (apiserver_request:availability30d{verb=\"all\"} - 0.990000)",
+ "expr": "100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "errorbudget",
@@ -266,7 +875,11 @@ items:
},
"id": 5,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -305,7 +918,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "apiserver_request:availability30d{verb=\"read\"}",
+ "expr": "apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -338,17 +951,19 @@ items:
"datasource": "$datasource",
"description": "How many read requests (LIST,GET) per second do the apiservers get by code?",
"fill": 10,
+ "fillGradient": 0,
"gridPos": {
},
"id": 6,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -389,7 +1004,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\"})",
+ "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ code }}",
@@ -446,17 +1061,19 @@ items:
"datasource": "$datasource",
"description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 7,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -482,7 +1099,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\"})",
+ "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ resource }}",
@@ -539,17 +1156,19 @@ items:
"datasource": "$datasource",
"description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 8,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -575,7 +1194,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"read\"}",
+ "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ resource }}",
@@ -659,7 +1278,11 @@ items:
},
"id": 9,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -698,7 +1321,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "apiserver_request:availability30d{verb=\"write\"}",
+ "expr": "apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -731,17 +1354,19 @@ items:
"datasource": "$datasource",
"description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?",
"fill": 10,
+ "fillGradient": 0,
"gridPos": {
},
"id": 10,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -782,7 +1407,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\"})",
+ "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ code }}",
@@ -839,17 +1464,19 @@ items:
"datasource": "$datasource",
"description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 11,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -875,7 +1502,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\"})",
+ "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ resource }}",
@@ -932,17 +1559,19 @@ items:
"datasource": "$datasource",
"description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 12,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -968,7 +1597,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"write\"}",
+ "expr": "cluster_quantile:apiserver_request_slo_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ resource }}",
@@ -1037,17 +1666,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 13,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": false,
"sideWidth": null,
"total": false,
@@ -1068,12 +1699,12 @@ items:
],
"spaceLength": 10,
- "span": 4,
+ "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@@ -1129,17 +1760,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 14,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": false,
"sideWidth": null,
"total": false,
@@ -1160,12 +1793,12 @@ items:
],
"spaceLength": 10,
- "span": 4,
+ "span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@@ -1221,10 +1854,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 15,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -1252,12 +1887,12 @@ items:
],
"spaceLength": 10,
- "span": 4,
+ "span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} {{name}}",
@@ -1326,320 +1961,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 16,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "ETCD Cache Entry Total",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 17,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} hit",
- "refId": "A"
- },
- {
- "expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} miss",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "ETCD Cache Hit/Miss Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 18,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} get",
- "refId": "A"
- },
- {
- "expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} miss",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "ETCD Cache Duration 99th Quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 19,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -1721,17 +2055,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 20,
+ "id": 17,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -1757,7 +2093,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -1813,17 +2149,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
- "id": 21,
+ "id": 18,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -1919,7 +2257,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -1932,20 +2270,19 @@ items:
{
"allValue": null,
"current": {
- "text": "prod",
- "value": "prod"
+
},
"datasource": "$datasource",
"hide": 2,
"includeAll": false,
- "label": null,
+ "label": "cluster",
"multi": false,
"name": "cluster",
"options": [
],
- "query": "label_values(apiserver_request_total, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"apiserver\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -1970,7 +2307,7 @@ items:
"options": [
],
- "query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
+ "query": "label_values(up{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -2020,6 +2357,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-apiserver
namespace: monitoring
- apiVersion: v1
@@ -2084,6 +2426,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -2129,7 +2472,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2186,6 +2529,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -2231,7 +2575,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2334,6 +2678,9 @@ items:
"id": 5,
"lines": true,
"linewidth": 1,
+ "links": [
+
+ ],
"minSpan": 24,
"nullPointMode": "null as zero",
"renderer": "flot",
@@ -2529,7 +2876,7 @@ items:
],
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2538,7 +2885,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2547,7 +2894,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2556,7 +2903,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2565,7 +2912,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2574,7 +2921,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2583,7 +2930,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2592,7 +2939,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -2626,6 +2973,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -2671,7 +3019,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2728,6 +3076,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -2773,7 +3122,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2860,6 +3209,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -2903,7 +3253,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -2960,6 +3310,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3003,7 +3354,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3071,6 +3422,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3114,7 +3466,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3171,6 +3523,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3214,7 +3567,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3291,6 +3644,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3334,7 +3688,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3391,6 +3745,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3434,7 +3789,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\".+\"}[$interval:$resolution])) by (namespace))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{namespace}}",
@@ -3491,6 +3846,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3538,7 +3894,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs[$interval:$resolution])) by (instance))",
+ "expr": "sort_desc(sum(rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
@@ -3595,6 +3951,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
@@ -3642,7 +3999,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs[$interval:$resolution])) by (instance))",
+ "expr": "sort_desc(sum(rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$interval:$resolution]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$interval:$resolution])) by (instance))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
@@ -3797,7 +4154,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -3806,6 +4163,32 @@ items:
"refresh": 1,
"regex": "",
"type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
]
},
@@ -3845,6 +4228,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-cluster-total
namespace: monitoring
- apiVersion: v1
@@ -3898,7 +4286,11 @@ items:
},
"id": 2,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -3937,7 +4329,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(up{job=\"kube-controller-manager\"})",
+ "expr": "sum(up{cluster=\"$cluster\", job=\"kube-controller-manager\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -3969,10 +4361,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 3,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -4005,10 +4399,10 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{cluster}} {{instance}} {{name}}",
"refId": "A"
}
],
@@ -4074,10 +4468,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 4,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -4110,10 +4506,10 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)",
+ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{cluster}} {{instance}} {{name}}",
"refId": "A"
}
],
@@ -4179,10 +4575,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 5,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -4215,10 +4613,10 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} {{name}}",
+ "legendFormat": "{{cluster}} {{instance}} {{name}}",
"refId": "A"
}
],
@@ -4284,17 +4682,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 6,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -4320,28 +4720,28 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@@ -4397,17 +4797,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 7,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -4433,7 +4835,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@@ -4502,10 +4904,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 8,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -4538,7 +4942,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@@ -4607,17 +5011,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 9,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -4643,7 +5049,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}",
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -4699,17 +5105,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 10,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -4735,7 +5143,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -4791,17 +5199,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 11,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -4827,7 +5237,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}",
+ "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -4897,7 +5307,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -4911,6 +5321,32 @@ items:
"allValue": null,
"current": {
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+
},
"datasource": "$datasource",
"hide": 0,
@@ -4921,7 +5357,7 @@ items:
"options": [
],
- "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)",
+ "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -4971,11 +5407,16 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-controller-manager
namespace: monitoring
- apiVersion: v1
data:
- coredns-dashboard.json: |-
+ grafana-overview.json: |-
{
"annotations": {
"list": [
@@ -4986,131 +5427,248 @@ items:
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [
+
+ ],
+ "type": "dashboard"
+ },
"type": "dashboard"
}
]
},
- "description": "A dashboard for the CoreDNS DNS server.",
"editable": true,
- "gnetId": 5926,
+ "gnetId": null,
"graphTooltip": 0,
- "id": 14,
- "iteration": 1549319226130,
+ "id": 3085,
+ "iteration": 1631554945276,
"links": [
],
"panels": [
{
- "aliasColors": {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ ],
+ "noValue": "0",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ ]
},
"gridPos": {
- "h": 7,
- "w": 8,
+ "h": 5,
+ "w": 6,
"x": 0,
"y": 0
},
- "id": 1,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
+ "id": 6,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "total",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.1.3",
"targets": [
{
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (proto)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}",
- "refId": "A",
- "step": 60
- },
- {
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "total",
- "refId": "B",
- "step": 60
+ "expr": "grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
}
- ],
- "thresholds": [
-
],
"timeFrom": null,
- "timeRegions": [
-
- ],
"timeShift": null,
- "title": "Requests (total)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "title": "Firing Alerts",
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
]
},
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "gridPos": {
+ "h": 5,
+ "w": 6,
+ "x": 6,
+ "y": 0
+ },
+ "id": 8,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
},
+ "text": {
+
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.1.3",
+ "targets": [
{
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "expr": "sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})",
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Dashboards",
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": null,
+ "displayMode": "auto"
+ },
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+
+ ]
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 10,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.3",
+ "targets": [
+ {
+ "expr": "grafana_build_info{job=~\"$job\", instance=~\"$instance\"}",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Build Info",
+ "transformations": [
+ {
+ "id": "labelsToFields",
+ "options": {
+
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "branch": true,
+ "container": true,
+ "goversion": true,
+ "namespace": true,
+ "pod": true,
+ "revision": true
+ },
+ "indexByName": {
+ "Time": 7,
+ "Value": 11,
+ "branch": 4,
+ "container": 8,
+ "edition": 2,
+ "goversion": 6,
+ "instance": 1,
+ "job": 0,
+ "namespace": 9,
+ "pod": 10,
+ "revision": 5,
+ "version": 3
+ },
+ "renameByName": {
+
+ }
+ }
+ }
+ ],
+ "type": "table"
},
{
"aliasColors": {
@@ -5119,126 +5677,26 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
- },
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 0
- },
- "id": 12,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "total",
- "yaxis": 2
+ ]
},
- {
- "alias": "other",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(coredns_dns_request_type_count_total{instance=~\"$instance\"}[5m])) by (type)",
- "intervalFactor": 2,
- "legendFormat": "{{type}}",
- "refId": "A",
- "step": 60
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Requests (by qtype)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
+ "overrides": [
]
},
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
"fill": 1,
- "grid": {
-
- },
+ "fillGradient": 0,
"gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 0
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 5
},
+ "hiddenSeries": false,
"id": 2,
"legend": {
"avg": false,
@@ -5250,38 +5708,28 @@ items:
"values": false
},
"lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
"percentage": false,
- "pointradius": 5,
+ "pluginVersion": "8.1.3",
+ "pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
- {
- "alias": "total",
- "yaxis": 2
- }
+
],
"spaceLength": 10,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (zone)",
- "intervalFactor": 2,
- "legendFormat": "{{zone}}",
- "refId": "A",
- "step": 60
- },
- {
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))",
- "intervalFactor": 2,
- "legendFormat": "total",
- "refId": "B",
- "step": 60
+ "expr": "sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ",
+ "interval": "",
+ "legendFormat": "{{status_code}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -5292,11 +5740,11 @@ items:
],
"timeShift": null,
- "title": "Requests (by zone)",
+ "title": "RPS",
"tooltip": {
"shared": true,
"sort": 0,
- "value_type": "cumulative"
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -5310,478 +5758,22 @@ items:
},
"yaxes": [
{
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 7
- },
- "id": 10,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "total",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(coredns_dns_request_do_count_total{instance=~\"$instance\"}[5m]))",
- "intervalFactor": 2,
- "legendFormat": "DO",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))",
- "intervalFactor": 2,
- "legendFormat": "total",
- "refId": "B",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Requests (DO bit)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
+ "$$hashKey": "object:157",
+ "format": "reqps",
+ "label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 7
- },
- "id": 9,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "tcp:90",
- "yaxis": 2
- },
- {
- "alias": "tcp:99 ",
- "yaxis": 2
- },
- {
- "alias": "tcp:50",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:99 ",
- "refId": "A",
- "step": 60
- },
- {
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:90",
- "refId": "B",
- "step": 60
- },
- {
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:50",
- "refId": "C",
- "step": 60
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Requests (size, udp)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 7
- },
- "id": 14,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "tcp:90",
- "yaxis": 1
- },
- {
- "alias": "tcp:99 ",
- "yaxis": 1
- },
- {
- "alias": "tcp:50",
- "yaxis": 1
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:99 ",
- "refId": "A",
- "step": 60
- },
- {
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:90",
- "refId": "B",
- "step": 60
- },
- {
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:50",
- "refId": "C",
- "step": 60
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Requests (size,tcp)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 14
- },
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(coredns_dns_response_rcode_count_total{instance=~\"$instance\"}[5m])) by (rcode)",
- "intervalFactor": 2,
- "legendFormat": "{{rcode}}",
- "refId": "A",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Responses (by rcode)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
},
{
+ "$$hashKey": "object:158",
"format": "short",
+ "label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
],
"yaxis": {
@@ -5796,20 +5788,27 @@ items:
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
+ ]
+ },
+ "overrides": [
+
+ ]
},
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
- "h": 7,
+ "h": 8,
"w": 12,
"x": 12,
- "y": 14
+ "y": 5
},
- "id": 3,
+ "hiddenSeries": false,
+ "id": 4,
"legend": {
"avg": false,
"current": false,
@@ -5820,13 +5819,14 @@ items:
"values": false
},
"lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
"percentage": false,
- "pointradius": 5,
+ "pluginVersion": "8.1.3",
+ "pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
@@ -5837,25 +5837,25 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le, job))",
- "intervalFactor": 2,
- "legendFormat": "99%",
- "refId": "A",
- "step": 40
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1",
+ "interval": "",
+ "legendFormat": "99th Percentile",
+ "refId": "A"
},
{
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le))",
- "intervalFactor": 2,
- "legendFormat": "90%",
- "refId": "B",
- "step": 40
+ "exemplar": true,
+ "expr": "histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1",
+ "interval": "",
+ "legendFormat": "50th Percentile",
+ "refId": "B"
},
{
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le))",
- "intervalFactor": 2,
- "legendFormat": "50%",
- "refId": "C",
- "step": 40
+ "exemplar": true,
+ "expr": "sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))",
+ "interval": "",
+ "legendFormat": "Average",
+ "refId": "C"
}
],
"thresholds": [
@@ -5866,11 +5866,11 @@ items:
],
"timeShift": null,
- "title": "Responses (duration)",
+ "title": "Request Latency",
"tooltip": {
"shared": true,
"sort": 0,
- "value_type": "cumulative"
+ "value_type": "individual"
},
"type": "graph",
"xaxis": {
@@ -5884,487 +5884,21 @@ items:
},
"yaxes": [
{
+ "$$hashKey": "object:210",
"format": "ms",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
+ "label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 21
- },
- "id": 8,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "udp:50%",
- "yaxis": 1
- },
- {
- "alias": "tcp:50%",
- "yaxis": 2
- },
- {
- "alias": "tcp:90%",
- "yaxis": 2
- },
- {
- "alias": "tcp:99%",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:99%",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:90%",
- "refId": "B",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:50%",
- "metric": "",
- "refId": "C",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Responses (size, udp)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
},
{
+ "$$hashKey": "object:211",
"format": "short",
+ "label": null,
"logBase": 1,
"max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 21
- },
- "id": 13,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "udp:50%",
- "yaxis": 1
- },
- {
- "alias": "tcp:50%",
- "yaxis": 1
- },
- {
- "alias": "tcp:90%",
- "yaxis": 1
- },
- {
- "alias": "tcp:99%",
- "yaxis": 1
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:99%",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:90%",
- "refId": "B",
- "step": 40
- },
- {
- "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ",
- "intervalFactor": 2,
- "legendFormat": "{{proto}}:50%",
- "metric": "",
- "refId": "C",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Responses (size, tcp)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 28
- },
- "id": 15,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(coredns_cache_size{instance=~\"$instance\"}) by (type)",
- "intervalFactor": 2,
- "legendFormat": "{{type}}",
- "refId": "A",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Cache (size)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fill": 1,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 28
- },
- "id": 16,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "misses",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\"}[5m])) by (type)",
- "intervalFactor": 2,
- "legendFormat": "hits:{{type}}",
- "refId": "A",
- "step": 40
- },
- {
- "expr": "sum(rate(coredns_cache_misses_total{instance=~\"$instance\"}[5m])) by (type)",
- "intervalFactor": 2,
- "legendFormat": "misses",
- "refId": "B",
- "step": 40
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Cache (hitrate)",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "pps",
- "logBase": 1,
- "max": null,
- "min": 0,
+ "min": null,
"show": true
}
],
@@ -6374,39 +5908,100 @@ items:
}
}
],
- "schemaVersion": 16,
+ "schemaVersion": 30,
"style": "dark",
"tags": [
- "dns",
- "coredns"
+
],
"templating": {
"list": [
+ {
+ "current": {
+ "selected": true,
+ "text": "dev-cortex",
+ "value": "dev-cortex"
+ },
+ "description": null,
+ "error": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "queryValue": "",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {
+ "selected": false,
+ "text": [
+ "default/grafana"
+ ],
+ "value": [
+ "default/grafana"
+ ]
+ },
+ "datasource": "$datasource",
+ "definition": "label_values(grafana_build_info, job)",
+ "description": null,
+ "error": null,
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "job",
+ "options": [
+
+ ],
+ "query": {
+ "query": "label_values(grafana_build_info, job)",
+ "refId": "Billing Admin-job-Variable-Query"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": {
+ "selected": false,
"text": "All",
"value": "$__all"
},
- "datasource": "prometheus",
- "definition": "",
+ "datasource": "$datasource",
+ "definition": "label_values(grafana_build_info, instance)",
+ "description": null,
+ "error": null,
"hide": 0,
"includeAll": true,
- "label": "Instance",
- "multi": false,
+ "label": null,
+ "multi": true,
"name": "instance",
"options": [
],
- "query": "up{job=\"coredns\"}",
+ "query": {
+ "query": "label_values(grafana_build_info, instance)",
+ "refId": "Billing Admin-instance-Variable-Query"
+ },
"refresh": 1,
- "regex": ".*instance=\"(.*?)\".*",
+ "regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
- "tags": [
-
- ],
"tagsQuery": "",
"type": "query",
"useTags": false
@@ -6414,13 +6009,11 @@ items:
]
},
"time": {
- "from": "now-3h",
+ "from": "now-6h",
"to": "now"
},
"timepicker": {
- "now": true,
"refresh_intervals": [
- "5s",
"10s",
"30s",
"1m",
@@ -6430,26 +6023,21 @@ items:
"1h",
"2h",
"1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
]
},
- "timezone": "utc",
- "title": "CoreDNS",
- "version": 1
+ "timezone": "",
+ "title": "Grafana Overview",
+ "uid": "6be0s85Mk",
+ "version": 2
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-coredns-dashboard
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-grafana-overview
namespace: monitoring
- apiVersion: v1
data:
@@ -6486,10 +6074,12 @@ items:
"id": 1,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -6513,7 +6103,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__interval]))",
+ "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -6526,7 +6116,7 @@ items:
"title": "CPU Utilisation",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -6569,11 +6159,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 2,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -6597,7 +6190,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})",
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -6610,7 +6203,7 @@ items:
"title": "CPU Requests Commitment",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -6653,11 +6246,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 3,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -6681,7 +6277,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_cpu_cores{cluster=\"$cluster\"})",
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -6694,7 +6290,7 @@ items:
"title": "CPU Limits Commitment",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -6737,11 +6333,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 4,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -6765,7 +6364,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})",
+ "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -6778,7 +6377,7 @@ items:
"title": "Memory Utilisation",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -6821,11 +6420,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 5,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -6849,7 +6451,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -6862,7 +6464,7 @@ items:
"title": "Memory Requests Commitment",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -6905,11 +6507,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 6,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -6933,7 +6538,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable_memory_bytes{cluster=\"$cluster\"})",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -6946,7 +6551,7 @@ items:
"title": "Memory Limits Commitment",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -7000,11 +6605,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 7,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7028,7 +6636,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -7044,7 +6652,7 @@ items:
"title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -7098,11 +6706,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 8,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7140,8 +6751,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #A",
"thresholds": [
@@ -7158,8 +6770,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to workloads",
- "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
+ "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #B",
"thresholds": [
@@ -7176,6 +6789,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -7194,6 +6808,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -7212,6 +6827,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -7230,6 +6846,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -7248,6 +6865,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
@@ -7266,8 +6884,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@@ -7293,7 +6912,7 @@ items:
],
"targets": [
{
- "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7302,7 +6921,7 @@ items:
"step": 10
},
{
- "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
+ "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7311,7 +6930,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7320,7 +6939,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7329,7 +6948,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7338,7 +6957,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7347,7 +6966,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7364,7 +6983,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -7419,11 +7038,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 9,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7447,7 +7069,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -7463,7 +7085,7 @@ items:
"title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -7517,11 +7139,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 10,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7559,8 +7184,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #A",
"thresholds": [
@@ -7577,8 +7203,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to workloads",
- "linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
+ "linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #B",
"thresholds": [
@@ -7595,6 +7222,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -7613,6 +7241,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -7631,6 +7260,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -7649,6 +7279,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -7667,6 +7298,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
@@ -7685,8 +7317,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@@ -7712,7 +7345,7 @@ items:
],
"targets": [
{
- "expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7721,7 +7354,7 @@ items:
"step": 10
},
{
- "expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
+ "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7730,7 +7363,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7739,7 +7372,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7748,7 +7381,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7757,7 +7390,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7766,7 +7399,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -7783,7 +7416,7 @@ items:
"title": "Requests by Namespace",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -7840,10 +7473,12 @@ items:
"id": 11,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -7881,6 +7516,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -7899,6 +7535,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -7917,6 +7554,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -7935,6 +7573,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -7953,6 +7592,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -7971,6 +7611,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -7989,8 +7630,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@@ -8016,7 +7658,7 @@ items:
],
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8025,7 +7667,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8034,7 +7676,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8043,7 +7685,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8052,7 +7694,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8061,7 +7703,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -8078,7 +7720,7 @@ items:
"title": "Current Network Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -8116,7 +7758,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Current Network Usage",
"titleSize": "h6"
},
{
@@ -8133,11 +7775,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 12,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8156,12 +7801,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8177,7 +7822,7 @@ items:
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8208,19 +7853,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -8231,11 +7864,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 13,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8254,12 +7890,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8275,7 +7911,7 @@ items:
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8312,7 +7948,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -8329,11 +7965,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 14,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8352,12 +7991,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8373,7 +8012,7 @@ items:
"title": "Average Container Bandwidth by Namespace: Received",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8404,19 +8043,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -8427,11 +8054,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 15,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8450,12 +8080,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8471,7 +8101,7 @@ items:
"title": "Average Container Bandwidth by Namespace: Transmitted",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8508,7 +8138,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Average Container Bandwidth by Namespace",
"titleSize": "h6"
},
{
@@ -8525,11 +8155,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 16,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8548,12 +8181,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8569,7 +8202,7 @@ items:
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8584,7 +8217,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -8600,19 +8233,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -8623,11 +8244,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 17,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8646,12 +8270,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8667,7 +8291,7 @@ items:
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8682,7 +8306,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -8704,7 +8328,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -8721,11 +8345,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 18,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8744,12 +8371,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8765,7 +8392,7 @@ items:
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8780,7 +8407,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -8796,19 +8423,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -8819,11 +8434,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 19,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -8842,12 +8460,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{namespace}}",
@@ -8863,7 +8481,198 @@ items:
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Rate of Packets Dropped",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": -1,
+ "fill": 10,
+ "id": 20,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{namespace}}",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS(Reads+Writes)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "id": 21,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{namespace}}",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ThroughPut(Read+Write)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -8900,7 +8709,315 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Storage IO",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "id": 22,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "sort": {
+ "col": 4,
+ "desc": true
+ },
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "IOPS(Reads)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "IOPS(Writes)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "IOPS(Reads + Writes)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Throughput(Read)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Throughput(Write)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Throughput(Read + Write)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Namespace",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down to pods",
+ "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
+ "pattern": "namespace",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+
+ ],
+ "type": "string",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Storage IO",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "transform": "table",
+ "type": "table",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Storage IO - Distribution",
"titleSize": "h6"
}
],
@@ -8917,7 +9034,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -8942,7 +9059,7 @@ items:
"options": [
],
- "query": "label_values(node_cpu_seconds_total, cluster)",
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -8992,6 +9109,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-k8s-resources-cluster
namespace: monitoring
- apiVersion: v1
@@ -9027,11 +9149,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 1,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9055,7 +9180,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -9068,7 +9193,7 @@ items:
"title": "CPU Utilisation (from requests)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -9111,11 +9236,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 2,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9139,7 +9267,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -9152,7 +9280,7 @@ items:
"title": "CPU Utilisation (from limits)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -9195,11 +9323,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 3,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9223,7 +9354,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"})",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -9233,10 +9364,10 @@ items:
"thresholds": "70,80",
"timeFrom": null,
"timeShift": null,
- "title": "Memory Utilization (from requests)",
+ "title": "Memory Utilisation (from requests)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -9279,11 +9410,14 @@ items:
"fill": 1,
"format": "percentunit",
"id": 4,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9307,7 +9441,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"})",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})",
"format": "time_series",
"instant": true,
"intervalFactor": 2,
@@ -9320,7 +9454,7 @@ items:
"title": "Memory Utilisation (from limits)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "singlestat",
@@ -9374,11 +9508,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 5,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9399,8 +9536,9 @@ items:
"color": "#F2495C",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
},
@@ -9409,8 +9547,9 @@ items:
"color": "#FF9830",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
}
@@ -9421,7 +9560,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -9453,7 +9592,7 @@ items:
"title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -9507,11 +9646,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 6,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9549,6 +9691,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -9567,6 +9710,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -9585,6 +9729,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -9603,6 +9748,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -9621,6 +9767,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -9639,8 +9786,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -9666,7 +9814,7 @@ items:
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9675,7 +9823,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9684,7 +9832,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9693,7 +9841,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9702,7 +9850,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -9719,7 +9867,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -9774,11 +9922,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 7,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9799,8 +9950,9 @@ items:
"color": "#F2495C",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
},
@@ -9809,8 +9961,9 @@ items:
"color": "#FF9830",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
}
@@ -9821,7 +9974,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -9853,7 +10006,7 @@ items:
"title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -9907,11 +10060,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 8,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -9949,6 +10105,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -9967,6 +10124,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -9985,6 +10143,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -10003,6 +10162,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -10021,6 +10181,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -10039,6 +10200,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -10057,6 +10219,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
@@ -10075,6 +10238,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #H",
@@ -10093,8 +10257,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -10120,7 +10285,7 @@ items:
],
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10129,7 +10294,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10138,7 +10303,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10147,7 +10312,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10156,7 +10321,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10165,7 +10330,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10174,7 +10339,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
+ "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10183,7 +10348,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
+ "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10200,7 +10365,7 @@ items:
"title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -10257,10 +10422,12 @@ items:
"id": 9,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10298,6 +10465,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -10316,6 +10484,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -10334,6 +10503,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -10352,6 +10522,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -10370,6 +10541,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -10388,6 +10560,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -10406,8 +10579,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -10433,7 +10607,7 @@ items:
],
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10442,7 +10616,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10451,7 +10625,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10460,7 +10634,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10469,7 +10643,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10478,7 +10652,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -10495,7 +10669,7 @@ items:
"title": "Current Network Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -10533,7 +10707,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Current Network Usage",
"titleSize": "h6"
},
{
@@ -10550,11 +10724,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 10,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10573,12 +10750,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10594,7 +10771,7 @@ items:
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -10625,19 +10802,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -10648,11 +10813,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 11,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10671,12 +10839,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10692,7 +10860,7 @@ items:
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -10729,7 +10897,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -10746,11 +10914,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 12,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10769,12 +10940,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10790,7 +10961,7 @@ items:
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -10805,7 +10976,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -10821,19 +10992,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -10844,11 +11003,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 13,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10867,12 +11029,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10888,7 +11050,7 @@ items:
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -10903,7 +11065,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -10925,7 +11087,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -10942,11 +11104,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 14,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -10965,12 +11130,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -10986,7 +11151,7 @@ items:
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -11001,7 +11166,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -11017,19 +11182,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -11040,11 +11193,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 15,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11063,12 +11219,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -11084,7 +11240,198 @@ items:
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Rate of Packets Dropped",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": -1,
+ "fill": 10,
+ "id": 16,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS(Reads+Writes)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "id": 17,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod}}",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ThroughPut(Read+Write)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -11121,7 +11468,315 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Storage IO",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "id": 18,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "sort": {
+ "col": 4,
+ "desc": true
+ },
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "IOPS(Reads)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "IOPS(Writes)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "IOPS(Reads + Writes)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Throughput(Read)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Throughput(Write)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Throughput(Read + Write)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Pod",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": true,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down to pods",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "pattern": "pod",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+
+ ],
+ "type": "string",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Storage IO",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "transform": "table",
+ "type": "table",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Storage IO - Distribution",
"titleSize": "h6"
}
],
@@ -11138,7 +11793,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -11163,8 +11818,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -11190,8 +11845,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
- "refresh": 1,
+ "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -11240,6 +11895,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-k8s-resources-namespace
namespace: monitoring
- apiVersion: v1
@@ -11274,11 +11934,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 1,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11294,7 +11957,17 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
-
+ {
+ "alias": "max capacity",
+ "color": "#F2495C",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ }
],
"spaceLength": 10,
"span": 12,
@@ -11302,7 +11975,15 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"cpu\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "max capacity",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -11318,7 +11999,7 @@ items:
"title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -11372,11 +12053,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 2,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11414,6 +12098,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -11432,6 +12117,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -11450,6 +12136,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -11468,6 +12155,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -11486,6 +12174,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -11504,6 +12193,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "pod",
@@ -11531,7 +12221,7 @@ items:
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11540,7 +12230,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11549,7 +12239,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11558,7 +12248,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11567,7 +12257,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11584,7 +12274,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -11639,11 +12329,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 3,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11659,13 +12352,31 @@ items:
"points": false,
"renderer": "flot",
"seriesOverrides": [
-
+ {
+ "alias": "max capacity",
+ "color": "#F2495C",
+ "dashes": true,
+ "fill": 0,
+ "hiddenSeries": true,
+ "hideTooltip": true,
+ "legend": true,
+ "linewidth": 2,
+ "stack": false
+ }
],
"spaceLength": 10,
"span": 12,
"stack": true,
"steppedLine": false,
"targets": [
+ {
+ "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", node=~\"$node\", resource=\"memory\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "max capacity",
+ "legendLink": null,
+ "step": 10
+ },
{
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
"format": "time_series",
@@ -11683,7 +12394,7 @@ items:
"title": "Memory Usage (w/o cache)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -11737,11 +12448,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 4,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -11779,6 +12493,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -11797,6 +12512,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -11815,6 +12531,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -11833,6 +12550,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -11851,6 +12569,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -11869,6 +12588,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -11887,6 +12607,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
@@ -11905,6 +12626,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #H",
@@ -11923,6 +12645,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "pod",
@@ -11959,7 +12682,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11968,7 +12691,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~\"$node\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11977,7 +12700,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -11986,7 +12709,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~\"$node\"}) by (pod)",
+ "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12030,7 +12753,7 @@ items:
"title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -12085,7 +12808,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -12110,8 +12833,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -12137,8 +12860,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)",
- "refresh": 1,
+ "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -12187,6 +12910,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-k8s-resources-node
namespace: monitoring
- apiVersion: v1
@@ -12221,11 +12949,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 1,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12266,7 +12997,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@@ -12274,7 +13005,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "requests",
@@ -12282,7 +13013,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limits",
@@ -12298,7 +13029,7 @@ items:
"title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -12352,11 +13083,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 2,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12380,7 +13114,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container)",
+ "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@@ -12403,7 +13137,7 @@ items:
"title": "CPU Throttling",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -12457,11 +13191,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 3,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12499,6 +13236,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -12517,6 +13255,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -12535,6 +13274,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -12553,6 +13293,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -12571,6 +13312,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -12589,6 +13331,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "container",
@@ -12616,7 +13359,7 @@ items:
],
"targets": [
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12625,7 +13368,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12634,7 +13377,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12643,7 +13386,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12652,7 +13395,7 @@ items:
"step": 10
},
{
- "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -12669,7 +13412,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -12724,11 +13467,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 4,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12750,7 +13496,7 @@ items:
"dashes": true,
"fill": 0,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
},
@@ -12760,7 +13506,7 @@ items:
"dashes": true,
"fill": 0,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
}
@@ -12771,7 +13517,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{container}}",
@@ -12779,7 +13525,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "requests",
@@ -12787,7 +13533,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "limits",
@@ -12800,10 +13546,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Memory Usage",
+ "title": "Memory Usage (WSS)",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -12857,11 +13603,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 5,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -12891,7 +13640,7 @@ items:
"type": "hidden"
},
{
- "alias": "Memory Usage",
+ "alias": "Memory Usage (WSS)",
"colorMode": null,
"colors": [
@@ -12899,6 +13648,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -12917,6 +13667,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -12935,6 +13686,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -12953,6 +13705,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -12971,6 +13724,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -12989,6 +13743,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -13007,6 +13762,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #G",
@@ -13025,6 +13781,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #H",
@@ -13043,6 +13800,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "container",
@@ -13070,7 +13828,7 @@ items:
],
"targets": [
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13079,7 +13837,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13088,7 +13846,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13097,7 +13855,7 @@ items:
"step": 10
},
{
- "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)",
+ "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13106,7 +13864,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
+ "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13115,7 +13873,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "expr": "sum(container_memory_rss{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13124,7 +13882,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "expr": "sum(container_memory_cache{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13133,7 +13891,7 @@ items:
"step": 10
},
{
- "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
+ "expr": "sum(container_memory_swap{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -13150,7 +13908,7 @@ items:
"title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -13207,10 +13965,12 @@ items:
"id": 6,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13229,12 +13989,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -13250,7 +14010,7 @@ items:
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13281,19 +14041,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -13306,10 +14054,12 @@ items:
"id": 7,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13328,12 +14078,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -13349,7 +14099,7 @@ items:
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13386,7 +14136,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -13405,10 +14155,12 @@ items:
"id": 8,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13427,12 +14179,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -13448,7 +14200,7 @@ items:
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13463,7 +14215,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -13479,19 +14231,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -13504,10 +14244,12 @@ items:
"id": 9,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13526,12 +14268,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -13547,7 +14289,7 @@ items:
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13562,7 +14304,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -13584,7 +14326,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -13603,10 +14345,12 @@ items:
"id": 10,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13625,12 +14369,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -13646,7 +14390,7 @@ items:
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13661,7 +14405,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -13677,19 +14421,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -13702,10 +14434,12 @@ items:
"id": 11,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13724,12 +14458,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -13745,7 +14479,214 @@ items:
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Rate of Packets Dropped",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": -1,
+ "fill": 10,
+ "id": 12,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Reads",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writes",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "id": 13,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Reads",
+ "legendLink": null,
+ "step": 10
+ },
+ {
+ "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writes",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ThroughPut",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -13782,7 +14723,506 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Storage IO - Distribution(Pod - Read & Writes)",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": -1,
+ "fill": 10,
+ "id": 14,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{container}}",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS(Reads+Writes)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "id": 15,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{container}}",
+ "legendLink": null,
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ThroughPut(Read+Write)",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Storage IO - Distribution(Containers)",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "id": 16,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+
+ ],
+ "sort": {
+ "col": 4,
+ "desc": true
+ },
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "IOPS(Reads)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #A",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "IOPS(Writes)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #B",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "IOPS(Reads + Writes)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": -1,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #C",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "Throughput(Read)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #D",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Throughput(Write)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #E",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Throughput(Read + Write)",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "Value #F",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "Bps"
+ },
+ {
+ "alias": "Container",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "linkTargetBlank": false,
+ "linkTooltip": "Drill down",
+ "linkUrl": "",
+ "pattern": "container",
+ "thresholds": [
+
+ ],
+ "type": "number",
+ "unit": "short"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+
+ ],
+ "type": "string",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "F",
+ "step": 10
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current Storage IO",
+ "tooltip": {
+ "shared": false,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "transform": "table",
+ "type": "table",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Storage IO - Distribution",
"titleSize": "h6"
}
],
@@ -13799,7 +15239,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -13824,8 +15264,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -13851,8 +15291,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
- "refresh": 1,
+ "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -13878,7 +15318,7 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
+ "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -13928,6 +15368,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-k8s-resources-pod
namespace: monitoring
- apiVersion: v1
@@ -13962,11 +15407,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 1,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -13990,7 +15438,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -14006,7 +15454,7 @@ items:
"title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -14060,11 +15508,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 2,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -14102,6 +15553,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -14120,6 +15572,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -14138,6 +15591,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -14156,6 +15610,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -14174,6 +15629,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -14192,8 +15648,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -14219,7 +15676,7 @@ items:
],
"targets": [
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14228,7 +15685,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14237,7 +15694,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14246,7 +15703,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14255,7 +15712,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14272,7 +15729,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -14327,11 +15784,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 3,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -14355,7 +15815,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -14371,7 +15831,7 @@ items:
"title": "Memory Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -14425,11 +15885,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 4,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -14467,6 +15930,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -14485,6 +15949,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -14503,6 +15968,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -14521,6 +15987,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -14539,6 +16006,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -14557,8 +16025,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -14584,7 +16053,7 @@ items:
],
"targets": [
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14593,7 +16062,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14602,7 +16071,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14611,7 +16080,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14620,7 +16089,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14637,7 +16106,7 @@ items:
"title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -14694,10 +16163,12 @@ items:
"id": 5,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -14735,6 +16206,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -14753,6 +16225,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -14771,6 +16244,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -14789,6 +16263,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -14807,6 +16282,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -14825,6 +16301,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -14843,8 +16320,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
+ "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -14870,7 +16348,7 @@ items:
],
"targets": [
{
- "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14879,7 +16357,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14888,7 +16366,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14897,7 +16375,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14906,7 +16384,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14915,7 +16393,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -14932,7 +16410,7 @@ items:
"title": "Current Network Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -14970,7 +16448,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Current Network Usage",
"titleSize": "h6"
},
{
@@ -14987,11 +16465,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 6,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15010,12 +16491,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15031,7 +16512,7 @@ items:
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15062,19 +16543,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -15085,11 +16554,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 7,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15108,12 +16580,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15129,7 +16601,7 @@ items:
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15166,7 +16638,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -15183,11 +16655,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 8,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15206,12 +16681,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15227,7 +16702,7 @@ items:
"title": "Average Container Bandwidth by Pod: Received",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15258,19 +16733,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -15281,11 +16744,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 9,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15304,12 +16770,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15325,7 +16791,7 @@ items:
"title": "Average Container Bandwidth by Pod: Transmitted",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15362,7 +16828,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Average Container Bandwidth by Pod",
"titleSize": "h6"
},
{
@@ -15379,11 +16845,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 10,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15402,12 +16871,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15423,7 +16892,7 @@ items:
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15438,7 +16907,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -15454,19 +16923,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -15477,11 +16934,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 11,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15500,12 +16960,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15521,7 +16981,7 @@ items:
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15536,7 +16996,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -15558,7 +17018,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -15575,11 +17035,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 12,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15598,12 +17061,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15619,7 +17082,7 @@ items:
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15634,7 +17097,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -15650,19 +17113,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -15673,11 +17124,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 13,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15696,12 +17150,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{pod}}",
@@ -15717,7 +17171,7 @@ items:
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -15732,7 +17186,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -15754,7 +17208,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets Dropped",
"titleSize": "h6"
}
],
@@ -15771,7 +17225,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -15796,8 +17250,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -15823,35 +17277,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
- "refresh": 1,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
- "text": "",
- "value": ""
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "workload",
- "options": [
-
- ],
- "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
- "refresh": 1,
+ "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -15877,8 +17304,35 @@ items:
"options": [
],
- "query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
- "refresh": 1,
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "workload",
+ "options": [
+
+ ],
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}, workload)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -15927,6 +17381,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-k8s-resources-workload
namespace: monitoring
- apiVersion: v1
@@ -15961,11 +17420,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 1,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -15986,8 +17448,9 @@ items:
"color": "#F2495C",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
},
@@ -15996,8 +17459,9 @@ items:
"color": "#FF9830",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
}
@@ -16008,7 +17472,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}} - {{workload_type}}",
@@ -16040,7 +17504,7 @@ items:
"title": "CPU Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -16094,11 +17558,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 2,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -16136,6 +17603,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -16154,6 +17622,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -16172,6 +17641,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -16190,6 +17660,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -16208,6 +17679,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -16226,6 +17698,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -16244,8 +17717,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
+ "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
"pattern": "workload",
"thresholds": [
@@ -16262,6 +17736,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "workload_type",
@@ -16289,7 +17764,7 @@ items:
],
"targets": [
{
- "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
+ "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16298,7 +17773,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16307,7 +17782,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16316,7 +17791,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16325,7 +17800,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16334,7 +17809,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16351,7 +17826,7 @@ items:
"title": "CPU Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -16406,11 +17881,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 3,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -16431,8 +17909,9 @@ items:
"color": "#F2495C",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
},
@@ -16441,8 +17920,9 @@ items:
"color": "#FF9830",
"dashes": true,
"fill": 0,
+ "hiddenSeries": true,
"hideTooltip": true,
- "legend": false,
+ "legend": true,
"linewidth": 2,
"stack": false
}
@@ -16453,7 +17933,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}} - {{workload_type}}",
@@ -16485,7 +17965,7 @@ items:
"title": "Memory Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -16539,11 +18019,14 @@ items:
"datasource": "$datasource",
"fill": 1,
"id": 4,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -16581,6 +18064,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 0,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -16599,6 +18083,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -16617,6 +18102,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -16635,6 +18121,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -16653,6 +18140,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -16671,6 +18159,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -16689,8 +18178,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
- "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
+ "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
"pattern": "workload",
"thresholds": [
@@ -16707,6 +18197,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "workload_type",
@@ -16734,7 +18225,7 @@ items:
],
"targets": [
{
- "expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
+ "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16743,7 +18234,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16752,7 +18243,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16761,7 +18252,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16770,7 +18261,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16779,7 +18270,7 @@ items:
"step": 10
},
{
- "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
+ "expr": "sum(\n container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -16796,7 +18287,7 @@ items:
"title": "Memory Quota",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -16853,10 +18344,12 @@ items:
"id": 5,
"interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -16894,6 +18387,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -16912,6 +18406,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -16930,6 +18425,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #C",
@@ -16948,6 +18444,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #D",
@@ -16966,6 +18463,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #E",
@@ -16984,6 +18482,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #F",
@@ -17002,8 +18501,9 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": true,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down to pods",
- "linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type",
+ "linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type",
"pattern": "workload",
"thresholds": [
@@ -17020,6 +18520,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "workload_type",
@@ -17047,7 +18548,7 @@ items:
],
"targets": [
{
- "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17056,7 +18557,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17065,7 +18566,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17074,7 +18575,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17083,7 +18584,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17092,7 +18593,7 @@ items:
"step": 10
},
{
- "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -17109,7 +18610,7 @@ items:
"title": "Current Network Usage",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -17147,7 +18648,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Current Network Usage",
"titleSize": "h6"
},
{
@@ -17164,11 +18665,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 6,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17187,12 +18691,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@@ -17208,7 +18712,7 @@ items:
"title": "Receive Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17239,19 +18743,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -17262,11 +18754,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 7,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17285,12 +18780,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@@ -17306,7 +18801,7 @@ items:
"title": "Transmit Bandwidth",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17343,7 +18838,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Bandwidth",
"titleSize": "h6"
},
{
@@ -17360,11 +18855,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 8,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17383,12 +18881,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@@ -17404,7 +18902,7 @@ items:
"title": "Average Container Bandwidth by Workload: Received",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17435,19 +18933,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -17458,11 +18944,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 9,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17481,12 +18970,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@@ -17502,7 +18991,7 @@ items:
"title": "Average Container Bandwidth by Workload: Transmitted",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17539,7 +19028,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Average Container Bandwidth by Workload",
"titleSize": "h6"
},
{
@@ -17556,11 +19045,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 10,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17579,12 +19071,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@@ -17600,7 +19092,7 @@ items:
"title": "Rate of Received Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17615,7 +19107,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -17631,19 +19123,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -17654,11 +19134,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 11,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17677,12 +19160,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@@ -17698,7 +19181,7 @@ items:
"title": "Rate of Transmitted Packets",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17713,7 +19196,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -17735,7 +19218,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets",
"titleSize": "h6"
},
{
@@ -17752,11 +19235,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 12,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17775,12 +19261,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@@ -17796,7 +19282,7 @@ items:
"title": "Rate of Received Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17811,7 +19297,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -17827,19 +19313,7 @@ items:
"show": false
}
]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
+ },
{
"aliasColors": {
@@ -17850,11 +19324,14 @@ items:
"datasource": "$datasource",
"fill": 10,
"id": 13,
+ "interval": "1m",
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": true,
"show": true,
"total": false,
"values": false
@@ -17873,12 +19350,12 @@ items:
],
"spaceLength": 10,
- "span": 12,
+ "span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{workload}}",
@@ -17894,7 +19371,7 @@ items:
"title": "Rate of Transmitted Packets Dropped",
"tooltip": {
"shared": false,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -17909,7 +19386,7 @@ items:
},
"yaxes": [
{
- "format": "Bps",
+ "format": "pps",
"label": null,
"logBase": 1,
"max": null,
@@ -17931,7 +19408,7 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Network",
+ "title": "Rate of Packets Dropped",
"titleSize": "h6"
}
],
@@ -17948,7 +19425,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -17958,38 +19435,6 @@ items:
"regex": "",
"type": "datasource"
},
- {
- "allValue": null,
- "auto": false,
- "auto_count": 30,
- "auto_min": "10s",
- "current": {
- "text": "deployment",
- "value": "deployment"
- },
- "datasource": "$datasource",
- "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
- "hide": 0,
- "includeAll": false,
- "label": null,
- "multi": false,
- "name": "type",
- "options": [
-
- ],
- "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
- "refresh": 1,
- "regex": "",
- "skipUrlSync": false,
- "sort": 0,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
{
"allValue": null,
"current": {
@@ -18005,8 +19450,8 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
- "refresh": 1,
+ "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
@@ -18032,13 +19477,45 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
- "refresh": 1,
+ "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "auto": false,
+ "auto_count": 30,
+ "auto_min": "10s",
+ "current": {
+ "text": "deployment",
+ "value": "deployment"
+ },
+ "datasource": "$datasource",
+ "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "type",
+ "options": [
+
+ ],
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
],
"tagsQuery": "",
"type": "query",
@@ -18082,6 +19559,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-k8s-resources-workloads-namespace
namespace: monitoring
- apiVersion: v1
@@ -18106,2367 +19588,2106 @@ items:
"id": null,
"links": [
+ ],
+ "panels": [
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
+
+ ],
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "links": [
+
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Running Kubelets",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
+
+ ],
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 3,
+ "links": [
+
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Running Pods",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
+
+ ],
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 4,
+ "links": [
+
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Running Containers",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
+
+ ],
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 12,
+ "y": 0
+ },
+ "id": 5,
+ "links": [
+
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Actual Volume Count",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
+
+ ],
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 16,
+ "y": 0
+ },
+ "id": 6,
+ "links": [
+
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Desired Volume Count",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "links": [
+
+ ],
+ "mappings": [
+
+ ],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+
+ ]
+ },
+ "unit": "none"
+ }
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 4,
+ "x": 20,
+ "y": 0
+ },
+ "id": 7,
+ "links": [
+
+ ],
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "textMode": "auto"
+ },
+ "pluginVersion": "7",
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Config Error Count",
+ "transparent": false,
+ "type": "stat"
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 7
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_type}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Operation Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 7
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_type}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Operation Error Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 14
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_type}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Operation duration 99th quantile",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 21
+ },
+ "id": 11,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} pod",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} worker",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pod Start Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 21
+ },
+ "id": 12,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} pod",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} worker",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pod Start Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 28
+ },
+ "id": 13,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Storage Operation Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 28
+ },
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Storage Operation Error Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 35
+ },
+ "id": 15,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Storage Operation Duration 99th quantile",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 42
+ },
+ "id": 16,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{operation_type}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cgroup manager operation rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 42
+ },
+ "id": 17,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{operation_type}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cgroup manager 99th quantile",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Pod lifecycle event generator",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 49
+ },
+ "id": 18,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PLEG relist rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 49
+ },
+ "id": 19,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PLEG relist interval",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 56
+ },
+ "id": 20,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PLEG relist duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 63
+ },
+ "id": 21,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "2xx",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "3xx",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "4xx",
+ "refId": "C"
+ },
+ {
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "5xx",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "RPC Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 70
+ },
+ "id": 22,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} {{verb}} {{url}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Request duration 99th quantile",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 77
+ },
+ "id": 23,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 77
+ },
+ "id": 24,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 77
+ },
+ "id": 25,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Goroutines",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
],
"refresh": "10s",
"rows": [
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- },
- "id": 2,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Up",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 3,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Running Pods",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 4,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Running Container",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 5,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Actual Volume Count",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 6,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Desired Volume Count",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 7,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Config Error Count",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "min"
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 8,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (operation_type, instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_type}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Operation Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 9,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_type}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Operation Error Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 10,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_type}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Operation duration 99th quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 11,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} pod",
- "refId": "A"
- },
- {
- "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} worker",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Pod Start Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 12,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} pod",
- "refId": "A"
- },
- {
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} worker",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Pod Start Duration",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 13,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Storage Operation Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 14,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Storage Operation Error Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 15,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_name, volume_plugin, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Storage Operation Duration 99th quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 16,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{operation_type}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Cgroup manager operation rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 17,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, operation_type, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{operation_type}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Cgroup manager 99th quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "description": "Pod lifecycle event generator",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 18,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "PLEG relist rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 19,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "PLEG relist interval",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 20,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])) by (instance, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "PLEG relist duration",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 21,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "2xx",
- "refId": "A"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "3xx",
- "refId": "B"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "4xx",
- "refId": "C"
- },
- {
- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "5xx",
- "refId": "D"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "RPC Rate",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 22,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{verb}} {{url}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Request duration 99th quantile",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 23,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 24,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}[5m])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 25,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 4,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\", metrics_path=\"/metrics\",instance=~\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Goroutines",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- }
],
"schemaVersion": 14,
"style": "dark",
@@ -20481,7 +21702,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -20505,7 +21726,7 @@ items:
"options": [
],
- "query": "label_values(kube_pod_info, cluster)",
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -20525,13 +21746,13 @@ items:
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
- "label": null,
+ "label": "instance",
"multi": false,
"name": "instance",
"options": [
],
- "query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"}, instance)",
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -20581,2096 +21802,13 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-kubelet
namespace: monitoring
-- apiVersion: v1
- data:
- kubernetes-cluster-dashboard.json: |-
- {
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- }
- ]
- },
- "description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. ",
- "editable": true,
- "gnetId": 162,
- "graphTooltip": 1,
- "links": [
-
- ],
- "panels": [
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 0
- },
- "id": 4,
- "interval": null,
- "isNew": true,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": "65, 90",
- "title": "Cluster memory usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": true,
- "colorValue": false,
- "colors": [
- "rgba(0, 0, 0, 0)",
- "rgb(210, 1, 1)",
- "#890f02"
- ],
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "percentunit",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 2,
- "w": 8,
- "x": 8,
- "y": 0
- },
- "id": 23,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "{job=\"kubelet\"}",
- "targets": [
- {
- "expr": "avg(up{job=\"kubelet\"}) BY (job)",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 1,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "1.1",
- "title": "Up Nodes",
- "type": "singlestat",
- "valueFontSize": "120%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "prometheus",
- "decimals": 0,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 0
- },
- "id": 6,
- "interval": null,
- "isNew": true,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "avg(100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": "65, 90",
- "title": "Cluster CPU usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "columns": [
-
- ],
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fontSize": "90%",
- "gridPos": {
- "h": 5,
- "w": 8,
- "x": 8,
- "y": 2
- },
- "id": 25,
- "links": [
-
- ],
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 2,
- "desc": false
- },
- "styles": [
- {
- "alias": "Time",
- "align": "auto",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "link": false,
- "pattern": "Time",
- "type": "date"
- },
- {
- "alias": "Uptime",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Value",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "s"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/endpoint|job|namespace|pod|service/",
- "thresholds": [
-
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "instance",
- "preserveFormat": false,
- "sanitize": false,
- "thresholds": [
-
- ],
- "type": "string",
- "unit": "short"
- }
- ],
- "targets": [
- {
- "expr": "(time() - node_boot_time_seconds)",
- "format": "table",
- "instant": true,
- "intervalFactor": 1,
- "refId": "A"
- }
- ],
- "title": "Node Uptime",
- "transform": "table",
- "transparent": true,
- "type": "table-old"
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 7
- },
- "id": 15,
- "panels": [
-
- ],
- "title": "Nodes",
- "type": "row"
- },
- {
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 0.85
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "B",
- "5m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "max"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "for": "0m",
- "frequency": "60s",
- "handler": 1,
- "name": "Memory Usage alert",
- "noDataState": "no_data",
- "notifications": [
-
- ]
- },
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 8
- },
- "hiddenSeries": false,
- "id": 10,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ instance }}",
- "refId": "A"
- },
- {
- "expr": "(node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes))/node_memory_MemTotal_bytes",
- "format": "time_series",
- "hide": true,
- "intervalFactor": 1,
- "refId": "B"
- }
- ],
- "thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 0.85
- }
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Memory Usage",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "decbytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 90
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "A",
- "15m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "max"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "CPU Usage alert",
- "noDataState": "no_data",
- "notifications": [
-
- ]
- },
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 12,
- "y": 8
- },
- "hiddenSeries": false,
- "id": 11,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)",
- "format": "time_series",
- "intervalFactor": 3,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 90
- }
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "CPU Usage",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "decimals": null,
- "format": "percent",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "columns": [
-
- ],
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fontSize": "100%",
- "gridPos": {
- "h": 9,
- "w": 12,
- "x": 0,
- "y": 17
- },
- "id": 31,
- "links": [
-
- ],
- "pageSize": null,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 0,
- "desc": true
- },
- "styles": [
- {
- "alias": "Time",
- "align": "auto",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "link": false,
- "pattern": "Time",
- "type": "date"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "condition|container|daemonset|endpoint|namespace|node",
- "thresholds": [
-
- ],
- "type": "hidden",
- "unit": "short"
- },
- {
- "alias": "",
- "align": "auto",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [
-
- ],
- "type": "number",
- "unit": "short"
- }
- ],
- "targets": [
- {
- "expr": "ALERTS{alertstate=\"firing\"}",
- "format": "table",
- "instant": true,
- "intervalFactor": 1,
- "refId": "A"
- },
- {
- "expr": "ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"}",
- "format": "table",
- "hide": true,
- "intervalFactor": 1,
- "refId": "B"
- }
- ],
- "title": "Active Alerts",
- "transform": "table",
- "type": "table-old"
- },
- {
- "dashboardFilter": "",
- "dashboardTags": [
-
- ],
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "folderId": null,
- "gridPos": {
- "h": 9,
- "w": 5,
- "x": 12,
- "y": 17
- },
- "id": 27,
- "limit": 10,
- "links": [
-
- ],
- "nameFilter": "",
- "onlyAlertsOnDashboard": false,
- "show": "current",
- "sortOrder": 1,
- "stateFilter": [
-
- ],
- "title": "Alarms",
- "type": "alertlist"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
- "datasource": "prometheus",
- "decimals": null,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 7,
- "x": 17,
- "y": 17
- },
- "id": 7,
- "interval": null,
- "isNew": true,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "metric": "",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": "65, 90",
- "title": "Cluster Filesystem usage",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 1
- ],
- "type": "lt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "C",
- "5m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "avg"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "Node Down",
- "noDataState": "alerting",
- "notifications": [
-
- ]
- },
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 4,
- "w": 7,
- "x": 17,
- "y": 22
- },
- "hiddenSeries": false,
- "id": 29,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(up{job=\"kubelet\"}) BY (job)",
- "format": "time_series",
- "hide": true,
- "instant": false,
- "intervalFactor": 1,
- "legendFormat": "Up Nodes",
- "refId": "A"
- },
- {
- "expr": "count(up{job=\"kubelet\"})",
- "format": "time_series",
- "hide": true,
- "instant": false,
- "intervalFactor": 1,
- "legendFormat": "Total Nodes",
- "refId": "B"
- },
- {
- "expr": "avg(up{job=\"kubelet\"}) BY (job)",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 1,
- "refId": "C"
- }
- ],
- "thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "lt",
- "value": 1
- }
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Up Nodes",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "alert": {
- "conditions": [
- {
- "evaluator": {
- "params": [
- 85
- ],
- "type": "gt"
- },
- "operator": {
- "type": "and"
- },
- "query": {
- "params": [
- "A",
- "1m",
- "now"
- ]
- },
- "reducer": {
- "params": [
-
- ],
- "type": "avg"
- },
- "type": "query"
- }
- ],
- "executionErrorState": "alerting",
- "frequency": "60s",
- "handler": 1,
- "name": "CPU Temperature alert",
- "noDataState": "no_data",
- "notifications": [
-
- ]
- },
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 6,
- "w": 24,
- "x": 0,
- "y": 26
- },
- "hiddenSeries": false,
- "id": 13,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rpi_cpu_temperature_celsius",
- "format": "time_series",
- "intervalFactor": 5,
- "legendFormat": "{{instance}}",
- "refId": "A"
- }
- ],
- "thresholds": [
- {
- "colorMode": "critical",
- "fill": true,
- "line": true,
- "op": "gt",
- "value": 85
- }
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "CPU Temperature",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "celsius",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 32
- },
- "id": 17,
- "panels": [
-
- ],
- "title": "Pods",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 0,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 33
- },
- "hiddenSeries": false,
- "id": 3,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 270,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "topk(10,sum by (pod)(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) ))",
- "format": "time_series",
- "instant": false,
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "{{ pod}}",
- "metric": "container_cpu",
- "refId": "A",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Pod CPU usage",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 40
- },
- "hiddenSeries": false,
- "id": 2,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 250,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod, image))",
- "format": "time_series",
- "hide": true,
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "{{ pod }}",
- "metric": "container_memory_usage:sort_desc",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 1,
- "legendFormat": "{{ pod }}",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Pod memory usage",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 19,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": 550,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod=~\".+\"}[5m])) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ pod_name }}",
- "refId": "A",
- "step": 240
- },
- {
- "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
- "format": "time_series",
- "hide": true,
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Sent Network Traffic per Container",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": "",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": "",
- "logBase": 10,
- "max": 8,
- "min": 0,
- "show": false
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 21,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": 150,
- "sort": "avg",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "null",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod=~\".+\"}[5m])) by (pod))",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{pod_name}}",
- "refId": "A",
- "step": 240
- },
- {
- "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])",
- "format": "time_series",
- "hide": true,
- "intervalFactor": 2,
- "legendFormat": "{{pod_name}}",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Received Network Traffic per Container",
- "tooltip": {
- "msResolution": true,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "prometheus",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 0,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 54
- },
- "hiddenSeries": false,
- "id": 8,
- "isNew": true,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "sideWidth": 220,
- "sort": "current",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "options": {
- "dataLinks": [
-
- ]
- },
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "Receive Traffic",
- "metric": "network",
- "refId": "A",
- "step": 10
- },
- {
- "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))",
- "format": "time_series",
- "interval": "10s",
- "intervalFactor": 1,
- "legendFormat": "Transmit Traffic",
- "metric": "network",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Pod Network i/o",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "$$hashKey": "object:1163",
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "$$hashKey": "object:1164",
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- }
- ],
- "refresh": "10s",
- "schemaVersion": 25,
- "style": "dark",
- "tags": [
- "custom"
- ],
- "templating": {
- "list": [
-
- ]
- },
- "time": {
- "from": "now-3h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "browser",
- "title": "Kubernetes cluster monitoring (via Prometheus)",
- "version": 1
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-kubernetes-cluster-dashboard
- namespace: monitoring
- apiVersion: v1
data:
namespace-by-pod.json: |-
@@ -22828,7 +21966,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))",
"format": "time_series",
"instant": null,
"intervalFactor": 1,
@@ -22955,7 +22093,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution]))",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution]))",
"format": "time_series",
"instant": null,
"intervalFactor": 1,
@@ -23025,6 +22163,9 @@ items:
"id": 5,
"lines": true,
"linewidth": 1,
+ "links": [
+
+ ],
"minSpan": 24,
"nullPointMode": "null as zero",
"renderer": "flot",
@@ -23184,7 +22325,7 @@ items:
],
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -23193,7 +22334,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -23202,7 +22343,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -23211,7 +22352,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -23220,7 +22361,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -23229,7 +22370,7 @@ items:
"step": 10
},
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -23273,6 +22414,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -23316,7 +22458,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -23373,6 +22515,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -23416,7 +22559,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -23484,6 +22627,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
@@ -23527,7 +22671,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -23584,6 +22728,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
@@ -23627,7 +22772,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -23704,6 +22849,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
@@ -23747,7 +22893,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -23804,6 +22950,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
@@ -23847,7 +22994,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -23922,7 +23069,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -23932,6 +23079,32 @@ items:
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": ".+",
"auto": false,
@@ -23942,7 +23115,7 @@ items:
"value": "kube-system"
},
"datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total, namespace)",
+ "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"label": null,
@@ -23951,8 +23124,8 @@ items:
"options": [
],
- "query": "label_values(container_network_receive_packets_total, namespace)",
- "refresh": 1,
+ "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -24082,6 +23255,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-namespace-by-pod
namespace: monitoring
- apiVersion: v1
@@ -24146,6 +23324,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -24191,7 +23370,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ workload }}",
@@ -24248,6 +23427,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -24293,7 +23473,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ workload }}",
@@ -24396,6 +23576,9 @@ items:
"id": 5,
"lines": true,
"linewidth": 1,
+ "links": [
+
+ ],
"minSpan": 24,
"nullPointMode": "null as zero",
"renderer": "flot",
@@ -24591,7 +23774,7 @@ items:
],
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -24600,7 +23783,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -24609,7 +23792,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -24618,7 +23801,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -24627,7 +23810,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -24636,7 +23819,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -24645,7 +23828,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -24654,7 +23837,7 @@ items:
"step": 10
},
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "table",
"instant": true,
"intervalFactor": 2,
@@ -24688,6 +23871,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -24733,7 +23917,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ workload }}",
@@ -24790,6 +23974,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -24835,7 +24020,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ workload }}",
@@ -24922,6 +24107,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -24965,7 +24151,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@@ -25022,6 +24208,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -25065,7 +24252,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@@ -25133,6 +24320,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -25176,7 +24364,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@@ -25233,6 +24421,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -25276,7 +24465,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@@ -25353,6 +24542,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -25396,7 +24586,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@@ -25453,6 +24643,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -25496,7 +24687,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{workload}}",
@@ -25571,7 +24762,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -25581,6 +24772,32 @@ items:
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"auto": false,
@@ -25591,7 +24808,7 @@ items:
"value": "kube-system"
},
"datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total, namespace)",
+ "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": false,
"label": null,
@@ -25600,8 +24817,8 @@ items:
"options": [
],
- "query": "label_values(container_network_receive_packets_total, namespace)",
- "refresh": 1,
+ "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -25623,7 +24840,7 @@ items:
"value": "deployment"
},
"datasource": "$datasource",
- "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
+ "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
"hide": 0,
"includeAll": false,
"label": null,
@@ -25632,8 +24849,8 @@ items:
"options": [
],
- "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
- "refresh": 1,
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\"}, workload_type)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
@@ -25763,29 +24980,41 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-namespace-by-workload
namespace: monitoring
- apiVersion: v1
data:
node-cluster-rsrc-use.json: |-
{
+ "__inputs": [
+
+ ],
+ "__requires": [
+
+ ],
"annotations": {
"list": [
]
},
- "editable": true,
+ "editable": false,
"gnetId": null,
- "graphTooltip": 0,
+ "graphTooltip": 1,
"hideControls": false,
+ "id": null,
"links": [
],
- "refresh": "10s",
+ "refresh": "30s",
"rows": [
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"aliasColors": {
@@ -25796,26 +25025,34 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 2,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
"lines": true,
- "linewidth": 0,
+ "linewidth": 1,
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
@@ -25825,12 +25062,11 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "(\n instance:node_cpu_utilisation:rate1m{job=\"node-exporter\"}\n*\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\"}))\n",
+ "expr": "((\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n *\n instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}\n) != 0 )\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\", cluster=\"$cluster\"}))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
+ "legendFormat": "{{ instance }}",
+ "refId": "A"
}
],
"thresholds": [
@@ -25840,8 +25076,8 @@ items:
"timeShift": null,
"title": "CPU Utilisation",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -25859,17 +25095,17 @@ items:
"format": "percentunit",
"label": null,
"logBase": 1,
- "max": 1,
- "min": 0,
+ "max": null,
+ "min": null,
"show": true
},
{
- "format": "short",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -25882,875 +25118,20 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 10,
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Saturation (load1 per CPU)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "CPU",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
+ "fillGradient": 0,
+ "gridPos": {
},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
"id": 3,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\"}\n/ scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 4,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Memory Saturation (Major Page Faults)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "rps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Memory",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/ Receive/",
- "stack": "A"
- },
- {
- "alias": "/ Transmit/",
- "stack": "B",
- "transform": "negative-Y"
- }
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} Receive",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- },
- {
- "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} Transmit",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Net Utilisation (Bytes Receive/Transmit)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "Bps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 6,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/ Receive/",
- "stack": "A"
- },
- {
- "alias": "/ Transmit/",
- "stack": "B",
- "transform": "negative-Y"
- }
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} Receive",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- },
- {
- "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} Transmit",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Net Saturation (Drops Receive/Transmit)",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "rps",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Network",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 7,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{device}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk IO Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 8,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\"}))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}} {{device}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk IO Saturation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Disk IO",
- "titleSize": "h6"
- },
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 10,
- "id": 9,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 0,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 12,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum without (device) (\n max without (fstype, mountpoint) (\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"} - node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\"}\n )\n) \n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\"})))\n",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{instance}}",
- "legendLink": "/dashboard/file/node-rsrc-use.json",
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk Space Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": 1,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Disk Space",
- "titleSize": "h6"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
-
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "USE Method / Cluster",
- "uid": "3e97d1d02672cdd0861f4c97c64f89b2",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-node-cluster-rsrc-use
- namespace: monitoring
-- apiVersion: v1
- data:
- node-rsrc-use.json: |-
- {
- "annotations": {
- "list": [
-
- ]
- },
- "editable": true,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "links": [
-
- ],
- "refresh": "10s",
- "rows": [
- {
- "collapse": false,
- "height": "250px",
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 1,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
+ "rightSide": false,
"show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -26759,112 +25140,26 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "instance:node_cpu_utilisation:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "Utilisation",
- "legendLink": null,
- "step": 10
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "CPU Utilisation",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Saturation",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -26874,8 +25169,8 @@ items:
"timeShift": null,
"title": "CPU Saturation (Load1 per CPU)",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -26894,16 +25189,16 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "short",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
}
@@ -26913,11 +25208,12 @@ items:
"repeatRowId": null,
"showTitle": true,
"title": "CPU",
- "titleSize": "h6"
+ "titleSize": "h6",
+ "type": "row"
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"aliasColors": {
@@ -26927,14 +25223,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 3,
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 4,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -26943,26 +25246,26 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "Memory",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -26972,8 +25275,8 @@ items:
"timeShift": null,
"title": "Memory Utilisation",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -26992,16 +25295,16 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "short",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -27013,14 +25316,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 4,
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 5,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -27029,26 +25339,26 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "instance:node_vmstat_pgmajfault:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "Major page faults",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -27058,8 +25368,8 @@ items:
"timeShift": null,
"title": "Memory Saturation (Major Page Faults)",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -27074,20 +25384,20 @@ items:
},
"yaxes": [
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
- },
- {
- "format": "short",
+ "format": "rds",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
+ },
+ {
+ "format": "rds",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
}
]
}
@@ -27097,11 +25407,12 @@ items:
"repeatRowId": null,
"showTitle": true,
"title": "Memory",
- "titleSize": "h6"
+ "titleSize": "h6",
+ "type": "row"
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"aliasColors": {
@@ -27111,14 +25422,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 5,
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 6,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -27127,11 +25445,12 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
{
"alias": "/Receive/",
@@ -27145,24 +25464,22 @@ items:
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "instance:node_network_receive_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "Receive",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}} Receive",
+ "refId": "A"
},
{
- "expr": "instance:node_network_transmit_bytes_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "Transmit",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}} Transmit",
+ "refId": "B"
}
],
"thresholds": [
@@ -27170,10 +25487,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Net Utilisation (Bytes Receive/Transmit)",
+ "title": "Network Utilisation (Bytes Receive/Transmit)",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -27196,12 +25513,12 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -27213,14 +25530,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 6,
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 7,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -27229,42 +25553,41 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
{
- "alias": "/Receive/",
+ "alias": "/ Receive/",
"stack": "A"
},
{
- "alias": "/Transmit/",
+ "alias": "/ Transmit/",
"stack": "B",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "instance:node_network_receive_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "Receive drops",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}} Receive",
+ "refId": "A"
},
{
- "expr": "instance:node_network_transmit_drop_excluding_lo:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=\"$cluster\"} != 0",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "Transmit drops",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}} Transmit",
+ "refId": "B"
}
],
"thresholds": [
@@ -27272,10 +25595,10 @@ items:
],
"timeFrom": null,
"timeShift": null,
- "title": "Net Saturation (Drops Receive/Transmit)",
+ "title": "Network Saturation (Drops Receive/Transmit)",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -27290,7 +25613,7 @@ items:
},
"yaxes": [
{
- "format": "rps",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -27298,12 +25621,12 @@ items:
"show": true
},
{
- "format": "short",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
}
@@ -27312,12 +25635,13 @@ items:
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
- "title": "Net",
- "titleSize": "h6"
+ "title": "Network",
+ "titleSize": "h6",
+ "type": "row"
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"aliasColors": {
@@ -27327,14 +25651,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 7,
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 8,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -27343,26 +25674,26 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "instance_device:node_disk_io_time_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "(\n instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{device}}",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}} {{device}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -27372,8 +25703,8 @@ items:
"timeShift": null,
"title": "Disk IO Utilisation",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -27392,16 +25723,16 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "short",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
},
@@ -27413,14 +25744,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 8,
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 9,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
- "show": true,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -27429,26 +25767,26 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "instance_device:node_disk_io_time_weighted_seconds:rate1m{job=\"node-exporter\", instance=\"$instance\"}",
+ "expr": "(\n instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}\n / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=\"$cluster\"}))\n) != 0\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{device}}",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}} {{device}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -27458,8 +25796,8 @@ items:
"timeShift": null,
"title": "Disk IO Saturation",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -27478,16 +25816,16 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "short",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
}
@@ -27497,11 +25835,12 @@ items:
"repeatRowId": null,
"showTitle": true,
"title": "Disk IO",
- "titleSize": "h6"
+ "titleSize": "h6",
+ "type": "row"
},
{
"collapse": false,
- "height": "250px",
+ "collapsed": false,
"panels": [
{
"aliasColors": {
@@ -27511,14 +25850,21 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
- "fill": 1,
- "id": 9,
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 10,
"legend": {
+ "alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
+ "rightSide": false,
"show": false,
+ "sideWidth": null,
"total": false,
"values": false
},
@@ -27527,26 +25873,26 @@ items:
"links": [
],
- "nullPointMode": "null as zero",
+ "nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
+ "repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 12,
- "stack": false,
+ "stack": true,
"steppedLine": false,
"targets": [
{
- "expr": "1 -\n(\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n/\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\"})\n)\n",
+ "expr": "sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=\"$cluster\"})))\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{device}}",
- "legendLink": null,
- "step": 10
+ "legendFormat": "{{instance}}",
+ "refId": "A"
}
],
"thresholds": [
@@ -27556,8 +25902,8 @@ items:
"timeShift": null,
"title": "Disk Space Utilisation",
"tooltip": {
- "shared": false,
- "sort": 0,
+ "shared": true,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -27576,16 +25922,16 @@ items:
"label": null,
"logBase": 1,
"max": null,
- "min": 0,
+ "min": null,
"show": true
},
{
- "format": "short",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": false
+ "show": true
}
]
}
@@ -27595,13 +25941,14 @@ items:
"repeatRowId": null,
"showTitle": true,
"title": "Disk Space",
- "titleSize": "h6"
+ "titleSize": "h6",
+ "type": "row"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
-
+ "node-exporter-mixin"
],
"templating": {
"list": [
@@ -27611,7 +25958,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -27624,22 +25971,22 @@ items:
{
"allValue": null,
"current": {
- "text": "prod",
- "value": "prod"
+ "text": "",
+ "value": ""
},
"datasource": "$datasource",
- "hide": 0,
+ "hide": 2,
"includeAll": false,
- "label": "instance",
+ "label": null,
"multi": false,
- "name": "instance",
+ "name": "cluster",
"options": [
],
- "query": "label_values(up{job=\"node-exporter\"}, instance)",
- "refresh": 1,
+ "query": "label_values(node_time_seconds, cluster)",
+ "refresh": 2,
"regex": "",
- "sort": 2,
+ "sort": 1,
"tagValuesQuery": "",
"tags": [
@@ -27679,18 +26026,22 @@ items:
"30d"
]
},
- "timezone": "UTC",
- "title": "USE Method / Node",
- "uid": "fac67cfbe174d3ef53eb473d73d9212f",
+ "timezone": "utc",
+ "title": "Node Exporter / USE Method / Cluster",
"version": 0
}
kind: ConfigMap
metadata:
- name: grafana-dashboard-node-rsrc-use
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-node-cluster-rsrc-use
namespace: monitoring
- apiVersion: v1
data:
- nodes.json: |-
+ node-rsrc-use.json: |-
{
"__inputs": [
@@ -27705,13 +26056,1091 @@ items:
},
"editable": false,
"gnetId": null,
- "graphTooltip": 0,
+ "graphTooltip": 1,
"hideControls": false,
"id": null,
"links": [
],
- "refresh": "",
+ "refresh": "30s",
+ "rows": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Utilisation",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Saturation",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Saturation (Load1 per CPU)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_memory_utilisation:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Utilisation",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Major page Faults",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Saturation (Major Page Faults)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "rds",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "rds",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Memory",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/Receive/",
+ "stack": "A"
+ },
+ {
+ "alias": "/Transmit/",
+ "stack": "B",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Receive",
+ "refId": "A"
+ },
+ {
+ "expr": "instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Transmit",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Utilisation (Bytes Receive/Transmit)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/ Receive/",
+ "stack": "A"
+ },
+ {
+ "alias": "/ Transmit/",
+ "stack": "B",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Receive",
+ "refId": "A"
+ },
+ {
+ "expr": "instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Transmit",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Saturation (Drops Receive/Transmit)",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Network",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IO Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=\"$cluster\"} != 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IO Saturation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk IO",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 10,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": false,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(1 -\n (\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n /\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=\"$cluster\"})\n ) != 0\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Space Utilisation",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk Space",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "node-exporter-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "",
+ "value": ""
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(node_time_seconds, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "instance",
+ "options": [
+
+ ],
+ "query": "label_values(node_exporter_build_info{job=\"node-exporter\", cluster=\"$cluster\"}, instance)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Node Exporter / USE Method / Node",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-node-rsrc-use
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ nodes-darwin.json: |-
+ {
+ "__inputs": [
+
+ ],
+ "__requires": [
+
+ ],
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [
+
+ ],
+ "refresh": "30s",
"rows": [
{
"collapse": false,
@@ -27726,6 +27155,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -27762,9 +27192,8 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "(\n (1 - rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[$__interval]))\n/ ignoring(cpu) group_left\n count without (cpu)( node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n",
+ "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n",
"format": "time_series",
- "interval": "1m",
"intervalFactor": 5,
"legendFormat": "{{cpu}}",
"refId": "A"
@@ -27777,7 +27206,7 @@ items:
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -27819,6 +27248,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 0,
+ "fillGradient": 0,
"gridPos": {
},
@@ -27890,7 +27320,7 @@ items:
"timeShift": null,
"title": "Load Average",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -27927,8 +27357,8 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
+ "showTitle": true,
+ "title": "CPU",
"titleSize": "h6",
"type": "row"
},
@@ -27945,6 +27375,1069 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 9,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Physical Memory",
+ "refId": "A"
+ },
+ {
+ "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"} +\n node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Memory Used",
+ "refId": "B"
+ },
+ {
+ "expr": "(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "App Memory",
+ "refId": "C"
+ },
+ {
+ "expr": "node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Wired Memory",
+ "refId": "D"
+ },
+ {
+ "expr": "node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Compressed",
+ "refId": "E"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "max": 100,
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)"
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 80
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 90
+ }
+ ]
+ },
+ "unit": "percent"
+ }
+ },
+ "gridPos": {
+
+ },
+ "id": 5,
+ "span": 3,
+ "targets": [
+ {
+ "expr": "(\n (\n avg(node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\"}) -\n avg(node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\"}) +\n avg(node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\"})\n ) /\n avg(node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\"})\n)\n*\n100\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": ""
+ }
+ ],
+ "title": "Memory Usage",
+ "transparent": false,
+ "type": "gauge"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Memory",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/ read| written/",
+ "yaxis": 1
+ },
+ {
+ "alias": "/ io time/",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} read",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} written",
+ "refId": "B"
+ },
+ {
+ "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} io time",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk I/O",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "datasource": "$datasource",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "yellow",
+ "value": 0.8
+ },
+ {
+ "color": "red",
+ "value": 0.9
+ }
+ ]
+ },
+ "unit": "decbytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Mounted on"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 260
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Size"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 93
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Used"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 72
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Available"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 88
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Used, %"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "percentunit"
+ },
+ {
+ "id": "custom.displayMode",
+ "value": "gradient-gauge"
+ },
+ {
+ "id": "max",
+ "value": 1
+ },
+ {
+ "id": "min",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+
+ },
+ "id": 7,
+ "span": 6,
+ "targets": [
+ {
+ "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": ""
+ },
+ {
+ "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 2,
+ "legendFormat": ""
+ }
+ ],
+ "title": "Disk Space Usage",
+ "transformations": [
+ {
+ "id": "groupBy",
+ "options": {
+ "fields": {
+ "Value #A": {
+ "aggregations": [
+ "lastNotNull"
+ ],
+ "operation": "aggregate"
+ },
+ "Value #B": {
+ "aggregations": [
+ "lastNotNull"
+ ],
+ "operation": "aggregate"
+ },
+ "mountpoint": {
+ "aggregations": [
+
+ ],
+ "operation": "groupby"
+ }
+ }
+ }
+ },
+ {
+ "id": "merge",
+ "options": {
+
+ }
+ },
+ {
+ "id": "calculateField",
+ "options": {
+ "alias": "Used",
+ "binary": {
+ "left": "Value #A (lastNotNull)",
+ "operator": "-",
+ "reducer": "sum",
+ "right": "Value #B (lastNotNull)"
+ },
+ "mode": "binary",
+ "reduce": {
+ "reducer": "sum"
+ }
+ }
+ },
+ {
+ "id": "calculateField",
+ "options": {
+ "alias": "Used, %",
+ "binary": {
+ "left": "Used",
+ "operator": "/",
+ "reducer": "sum",
+ "right": "Value #A (lastNotNull)"
+ },
+ "mode": "binary",
+ "reduce": {
+ "reducer": "sum"
+ }
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+
+ },
+ "indexByName": {
+
+ },
+ "renameByName": {
+ "Value #A (lastNotNull)": "Size",
+ "Value #B (lastNotNull)": "Available",
+ "mountpoint": "Mounted on"
+ }
+ }
+ },
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {
+
+ },
+ "sort": [
+ {
+ "field": "Mounted on"
+ }
+ ]
+ }
+ }
+ ],
+ "transparent": false,
+ "type": "table"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disk",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Network received (bits/s)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Received",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Network transmitted (bits/s)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Transmitted",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Network",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "node-exporter-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [
+
+ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Instance",
+ "multi": false,
+ "name": "instance",
+ "options": [
+
+ ],
+ "query": "label_values(node_uname_info{job=\"node-exporter\", sysname=\"Darwin\"}, instance)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "utc",
+ "title": "Node Exporter / MacOS",
+ "version": 0
+ }
+ kind: ConfigMap
+ metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana-dashboard-nodes-darwin
+ namespace: monitoring
+- apiVersion: v1
+ data:
+ nodes.json: |-
+ {
+ "__inputs": [
+
+ ],
+ "__requires": [
+
+ ],
+ "annotations": {
+ "list": [
+
+ ]
+ },
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [
+
+ ],
+ "refresh": "30s",
+ "rows": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"})\n)\n",
+ "format": "time_series",
+ "intervalFactor": 5,
+ "legendFormat": "{{cpu}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
+
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [
+
+ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "1m load average",
+ "refId": "A"
+ },
+ {
+ "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "5m load average",
+ "refId": "B"
+ },
+ {
+ "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "15m load average",
+ "refId": "C"
+ },
+ {
+ "expr": "count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", mode=\"idle\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "logical cores",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [
+
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Load Average",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "CPU",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": {
+
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -28016,7 +28509,7 @@ items:
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -28050,95 +28543,53 @@ items:
]
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(245, 54, 54, 0.9)"
- ],
"datasource": "$datasource",
- "format": "percent",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "fieldConfig": {
+ "defaults": {
+ "max": 100,
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)"
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 80
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 90
+ }
+ ]
+ },
+ "unit": "percent"
+ }
},
"gridPos": {
},
"id": 5,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
"span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
"targets": [
{
- "expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n",
+ "expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}) /\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
+ "legendFormat": ""
}
],
- "thresholds": "80, 90",
"title": "Memory Usage",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
+ "transparent": false,
+ "type": "gauge"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
+ "showTitle": true,
+ "title": "Memory",
"titleSize": "h6",
"type": "row"
},
@@ -28155,6 +28606,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 0,
+ "fillGradient": 0,
"gridPos": {
},
@@ -28198,26 +28650,23 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
+ "expr": "rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
"format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{device}} read",
"refId": "A"
},
{
- "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
+ "expr": "rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
"format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{device}} written",
"refId": "B"
},
{
- "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\"}[$__interval])",
+ "expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])",
"format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{device}} io time",
"refId": "C"
}
@@ -28229,7 +28678,7 @@ items:
"timeShift": null,
"title": "Disk I/O",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -28245,7 +28694,7 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "Bps",
"label": null,
"logBase": 1,
"max": null,
@@ -28253,7 +28702,7 @@ items:
"show": true
},
{
- "format": "s",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
@@ -28263,117 +28712,230 @@ items:
]
},
{
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
"datasource": "$datasource",
- "fill": 1,
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+
+ },
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green"
+ },
+ {
+ "color": "yellow",
+ "value": 0.8
+ },
+ {
+ "color": "red",
+ "value": 0.9
+ }
+ ]
+ },
+ "unit": "decbytes"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Mounted on"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 260
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Size"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 93
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Used"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 72
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Available"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": 88
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Used, %"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "percentunit"
+ },
+ {
+ "id": "custom.displayMode",
+ "value": "gradient-gauge"
+ },
+ {
+ "id": "max",
+ "value": 1
+ },
+ {
+ "id": "min",
+ "value": 0
+ }
+ ]
+ }
+ ]
+ },
"gridPos": {
},
"id": 7,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
- {
- "alias": "used",
- "color": "#E0B400"
- },
- {
- "alias": "available",
- "color": "#73BF69"
- }
- ],
- "spaceLength": 10,
"span": 6,
- "stack": true,
- "steppedLine": false,
"targets": [
{
- "expr": "sum(\n max by (device) (\n node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n",
- "format": "time_series",
+ "expr": "max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "used",
- "refId": "A"
+ "legendFormat": ""
},
{
- "expr": "sum(\n max by (device) (\n node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\"}\n )\n)\n",
- "format": "time_series",
+ "expr": "max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", fstype!=\"\", mountpoint!=\"\"})\n",
+ "format": "table",
+ "instant": true,
"intervalFactor": 2,
- "legendFormat": "available",
- "refId": "B"
+ "legendFormat": ""
}
],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
"title": "Disk Space Usage",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
+ "transformations": [
{
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "id": "groupBy",
+ "options": {
+ "fields": {
+ "Value #A": {
+ "aggregations": [
+ "lastNotNull"
+ ],
+ "operation": "aggregate"
+ },
+ "Value #B": {
+ "aggregations": [
+ "lastNotNull"
+ ],
+ "operation": "aggregate"
+ },
+ "mountpoint": {
+ "aggregations": [
+
+ ],
+ "operation": "groupby"
+ }
+ }
+ }
},
{
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": 0,
- "show": true
+ "id": "merge",
+ "options": {
+
+ }
+ },
+ {
+ "id": "calculateField",
+ "options": {
+ "alias": "Used",
+ "binary": {
+ "left": "Value #A (lastNotNull)",
+ "operator": "-",
+ "reducer": "sum",
+ "right": "Value #B (lastNotNull)"
+ },
+ "mode": "binary",
+ "reduce": {
+ "reducer": "sum"
+ }
+ }
+ },
+ {
+ "id": "calculateField",
+ "options": {
+ "alias": "Used, %",
+ "binary": {
+ "left": "Used",
+ "operator": "/",
+ "reducer": "sum",
+ "right": "Value #A (lastNotNull)"
+ },
+ "mode": "binary",
+ "reduce": {
+ "reducer": "sum"
+ }
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+
+ },
+ "indexByName": {
+
+ },
+ "renameByName": {
+ "Value #A (lastNotNull)": "Size",
+ "Value #B (lastNotNull)": "Available",
+ "mountpoint": "Mounted on"
+ }
+ }
+ },
+ {
+ "id": "sortBy",
+ "options": {
+ "fields": {
+
+ },
+ "sort": [
+ {
+ "field": "Mounted on"
+ }
+ ]
+ }
}
- ]
+ ],
+ "transparent": false,
+ "type": "table"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
+ "showTitle": true,
+ "title": "Disk",
"titleSize": "h6",
"type": "row"
},
@@ -28389,7 +28951,9 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "description": "Network received (bits/s)",
"fill": 0,
+ "fillGradient": 0,
"gridPos": {
},
@@ -28426,10 +28990,9 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
+ "expr": "rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8",
"format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{device}}",
"refId": "A"
}
@@ -28441,7 +29004,7 @@ items:
"timeShift": null,
"title": "Network Received",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -28457,7 +29020,7 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "bps",
"label": null,
"logBase": 1,
"max": null,
@@ -28465,7 +29028,7 @@ items:
"show": true
},
{
- "format": "bytes",
+ "format": "bps",
"label": null,
"logBase": 1,
"max": null,
@@ -28482,7 +29045,9 @@ items:
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
+ "description": "Network transmitted (bits/s)",
"fill": 0,
+ "fillGradient": 0,
"gridPos": {
},
@@ -28519,10 +29084,9 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__interval])",
+ "expr": "rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!=\"lo\"}[$__rate_interval]) * 8",
"format": "time_series",
- "interval": "1m",
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{device}}",
"refId": "A"
}
@@ -28534,7 +29098,7 @@ items:
"timeShift": null,
"title": "Network Transmitted",
"tooltip": {
- "shared": false,
+ "shared": true,
"sort": 0,
"value_type": "individual"
},
@@ -28550,7 +29114,7 @@ items:
},
"yaxes": [
{
- "format": "bytes",
+ "format": "bps",
"label": null,
"logBase": 1,
"max": null,
@@ -28558,7 +29122,7 @@ items:
"show": true
},
{
- "format": "bytes",
+ "format": "bps",
"label": null,
"logBase": 1,
"max": null,
@@ -28571,8 +29135,8 @@ items:
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
+ "showTitle": true,
+ "title": "Network",
"titleSize": "h6",
"type": "row"
}
@@ -28580,17 +29144,17 @@ items:
"schemaVersion": 14,
"style": "dark",
"tags": [
-
+ "node-exporter-mixin"
],
"templating": {
"list": [
{
"current": {
- "text": "Prometheus",
- "value": "Prometheus"
+ "text": "default",
+ "value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -28608,13 +29172,13 @@ items:
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
- "label": null,
+ "label": "Instance",
"multi": false,
"name": "instance",
"options": [
],
- "query": "label_values(node_exporter_build_info{job=\"node-exporter\"}, instance)",
+ "query": "label_values(node_uname_info{job=\"node-exporter\", sysname!=\"Darwin\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 0,
@@ -28657,13 +29221,17 @@ items:
"30d"
]
},
- "timezone": "UTC",
- "title": "Nodes",
- "uid": "fa49a4706d07a042595b664c87fb33ea",
+ "timezone": "utc",
+ "title": "Node Exporter / Nodes",
"version": 0
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-nodes
namespace: monitoring
- apiVersion: v1
@@ -28704,17 +29272,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 2,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -28740,14 +29310,14 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n",
+ "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Used Space",
"refId": "A"
},
{
- "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n",
+ "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Free Space",
@@ -28816,7 +29386,11 @@ items:
},
"id": 3,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -28855,7 +29429,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
+ "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -28900,17 +29474,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 4,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -28936,14 +29512,14 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n",
+ "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Used inodes",
"refId": "A"
},
{
- "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n",
+ "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": " Free inodes",
@@ -29012,7 +29588,11 @@ items:
},
"id": 5,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -29051,7 +29631,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
+ "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -29097,7 +29677,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -29121,7 +29701,7 @@ items:
"options": [
],
- "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)",
+ "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", metrics_path=\"/metrics\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -29223,6 +29803,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-persistentvolumesusage
namespace: monitoring
- apiVersion: v1
@@ -29382,7 +29967,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))",
"format": "time_series",
"instant": null,
"intervalFactor": 1,
@@ -29509,7 +30094,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution]))",
"format": "time_series",
"instant": null,
"intervalFactor": 1,
@@ -29562,6 +30147,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -29605,7 +30191,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -29662,6 +30248,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -29705,7 +30292,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -29773,6 +30360,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
@@ -29816,7 +30404,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -29873,6 +30461,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
@@ -29916,7 +30505,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -29993,6 +30582,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
@@ -30036,7 +30626,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -30093,6 +30683,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 10,
"w": 12,
@@ -30136,7 +30727,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
+ "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$interval:$resolution])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -30211,7 +30802,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -30221,6 +30812,32 @@ items:
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": ".+",
"auto": false,
@@ -30231,7 +30848,7 @@ items:
"value": "kube-system"
},
"datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total, namespace)",
+ "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"label": null,
@@ -30240,8 +30857,8 @@ items:
"options": [
],
- "query": "label_values(container_network_receive_packets_total, namespace)",
- "refresh": 1,
+ "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -30263,7 +30880,7 @@ items:
"value": ""
},
"datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)",
+ "definition": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)",
"hide": 0,
"includeAll": false,
"label": null,
@@ -30272,8 +30889,8 @@ items:
"options": [
],
- "query": "label_values(container_network_receive_packets_total{namespace=~\"$namespace\"}, pod)",
- "refresh": 1,
+ "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -30403,4806 +31020,13 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-pod-total
namespace: monitoring
-- apiVersion: v1
- data:
- prometheus-dashboard.json: |-
- {
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": "-- Grafana --",
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- },
- {
- "datasource": "$datasource",
- "enable": true,
- "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)",
- "hide": false,
- "iconColor": "rgb(250, 44, 18)",
- "limit": 100,
- "name": "downage",
- "showIn": 0,
- "step": "30s",
- "tagKeys": "instance",
- "textFormat": "prometheus down",
- "titleFormat": "Downage",
- "type": "alert"
- },
- {
- "datasource": "$datasource",
- "enable": true,
- "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds[10m])) by (instance)",
- "hide": false,
- "iconColor": "#fceaca",
- "limit": 100,
- "name": "Reload",
- "showIn": 0,
- "step": "5m",
- "tagKeys": "instance",
- "tags": [
-
- ],
- "titleFormat": "Reload",
- "type": "tags"
- }
- ]
- },
- "description": "Dashboard for monitoring of Prometheus v2.x.x",
- "editable": true,
- "gnetId": 3681,
- "graphTooltip": 1,
- "id": 4,
- "iteration": 1596721016726,
- "links": [
- {
- "icon": "info",
- "tags": [
-
- ],
- "targetBlank": true,
- "title": "Dashboard's Github ",
- "tooltip": "Github repo of this dashboard",
- "type": "link",
- "url": "https://github.com/FUSAKLA/Prometheus2-grafana-dashboard"
- },
- {
- "icon": "doc",
- "tags": [
-
- ],
- "targetBlank": true,
- "title": "Prometheus Docs",
- "tooltip": "",
- "type": "link",
- "url": "http://prometheus.io/docs/introduction/overview/"
- }
- ],
- "panels": [
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 0
- },
- "id": 55,
- "panels": [
-
- ],
- "repeat": null,
- "title": "Header instance info",
- "type": "row"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#bf1b00"
- ],
- "datasource": "$datasource",
- "decimals": 1,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "s",
- "gauge": {
- "maxValue": 1000000,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 4,
- "x": 0,
- "y": 1
- },
- "id": 41,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "min(time() - process_start_time_seconds{instance=\"$instance\"})",
- "format": "time_series",
- "instant": false,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Uptime",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": true,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#bf1b00"
- ],
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "short",
- "gauge": {
- "maxValue": 1000000,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 8,
- "x": 4,
- "y": 1
- },
- "id": 42,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "prometheus_tsdb_head_series{instance=\"localhost:9090\", job=\"prometheus\"}",
- "targets": [
- {
- "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}",
- "format": "time_series",
- "instant": false,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "500000,800000,1000000",
- "title": "Total count of time series",
- "type": "singlestat",
- "valueFontSize": "150%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 3,
- "x": 12,
- "y": 1
- },
- "id": 48,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "version",
- "targets": [
- {
- "expr": "prometheus_build_info{instance=\"$instance\"}",
- "format": "table",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Version",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "first"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "decimals": 2,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "ms",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 4,
- "x": 15,
- "y": 1
- },
- "id": 49,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "{instance=\"localhost:9090\", job=\"prometheus\"}",
- "targets": [
- {
- "expr": "prometheus_tsdb_head_max_time{instance=\"$instance\"} - prometheus_tsdb_head_min_time{instance=\"$instance\"}",
- "format": "time_series",
- "instant": true,
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Actual head block length",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "content": "
",
- "datasource": null,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "gridPos": {
- "h": 5,
- "w": 2,
- "x": 19,
- "y": 1
- },
- "height": "",
- "id": 50,
- "links": [
-
- ],
- "mode": "html",
- "options": {
- "content": "
",
- "mode": "html"
- },
- "pluginVersion": "7.1.0",
- "title": "",
- "transparent": true,
- "type": "text"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": true,
- "colors": [
- "#e6522c",
- "rgba(237, 129, 40, 0.89)",
- "#299c46"
- ],
- "datasource": "$datasource",
- "decimals": 1,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 5,
- "w": 3,
- "x": 21,
- "y": 1
- },
- "id": 52,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "2",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "10,20",
- "title": "",
- "transparent": true,
- "type": "singlestat",
- "valueFontSize": "200%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 6
- },
- "id": 56,
- "panels": [
-
- ],
- "repeat": null,
- "title": "Main info",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 7
- },
- "hiddenSeries": false,
- "id": 15,
- "legend": {
- "avg": true,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "max(prometheus_engine_query_duration_seconds{instance=\"$instance\"}) by (instance, slice)",
- "format": "time_series",
- "intervalFactor": 1,
- "legendFormat": "max duration for {{slice}}",
- "metric": "prometheus_local_storage_rushed_mode",
- "refId": "A",
- "step": 900
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Query elapsed time",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": "",
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 7
- },
- "hiddenSeries": false,
- "id": 17,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_tsdb_head_series_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "created on {{ instance }}",
- "metric": "prometheus_local_storage_maintain_series_duration_seconds_count",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_tsdb_head_series_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "removed on {{ instance }}",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Head series created/deleted",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 7
- },
- "hiddenSeries": false,
- "id": 13,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "exceeded_sample_limit on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "duplicate_timestamp on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "B",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "out_of_bounds on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "C",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "out_of_order on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "D",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "rule_evaluation_failure on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "G",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "tsdb_compactions_failed on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "K",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "tsdb_reloads_failures on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "L",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "head_series_not_found on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "N",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "evaluator_iterations_missed on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "O",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "evaluator_iterations_skipped on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "P",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Prometheus errors",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 14
- },
- "id": 57,
- "panels": [
-
- ],
- "repeat": null,
- "title": "Scrape & rule duration",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "description": "",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "grid": {
-
- },
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 15
- },
- "hiddenSeries": false,
- "id": 25,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": true,
- "min": false,
- "show": false,
- "sort": "max",
- "sortDesc": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 2,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "prometheus_target_interval_length_seconds{instance=\"$instance\",quantile=\"0.99\"} - $scrape_interval",
- "format": "time_series",
- "interval": "2m",
- "intervalFactor": 1,
- "legendFormat": "{{instance}}",
- "metric": "",
- "refId": "A",
- "step": 300
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Scrape delay (counts with 1m scrape interval)",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 15
- },
- "hiddenSeries": false,
- "id": 14,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "Queue length",
- "yaxis": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_evaluator_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Queue length",
- "metric": "prometheus_local_storage_indexing_queue_length",
- "refId": "B",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Rule evaulation duration",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 22
- },
- "id": 58,
- "panels": [
-
- ],
- "repeat": null,
- "title": "Requests & queries",
- "type": "row"
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 0,
- "y": 23
- },
- "hiddenSeries": false,
- "id": 18,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(http_requests_total{instance=\"$instance\"}[$aggregation_interval])) by (instance, handler) > 0",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ handler }} on {{ instance }}",
- "metric": "",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Request count",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 6,
- "y": 23
- },
- "hiddenSeries": false,
- "id": 16,
- "legend": {
- "avg": false,
- "current": false,
- "hideEmpty": true,
- "hideZero": true,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "max(sum(http_request_duration_microseconds{instance=\"$instance\"}) by (instance, handler, quantile)) by (instance, handler) > 0",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "{{ handler }} on {{ instance }}",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Request duration per handler",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "µs",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 23
- },
- "hiddenSeries": false,
- "id": 19,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(http_request_size_bytes{instance=\"$instance\", quantile=\"0.99\"}[$aggregation_interval])) by (instance, handler) > 0",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "{{ handler }} in {{ instance }}",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Request size by handler",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Allocated bytes": "#F9BA8F",
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max count collector": "#bf1b00",
- "Max count harvester": "#bf1b00",
- "Max to persist": "#3F6833",
- "RSS": "#890F02"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 23
- },
- "hiddenSeries": false,
- "id": 8,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/Max.*/",
- "fill": 0,
- "linewidth": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_engine_queries{instance=\"$instance\"}) by (instance, handler)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Current count ",
- "metric": "last",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(prometheus_engine_queries_concurrent_max{instance=\"$instance\"}) by (instance, handler)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Max count",
- "metric": "last",
- "refId": "B",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Cont of concurent queries",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 30
- },
- "id": 59,
- "panels": [
-
- ],
- "repeat": null,
- "title": "Alerting",
- "type": "row"
- },
- {
- "aliasColors": {
- "Alert queue capacity on o collector": "#bf1b00",
- "Alert queue capacity on o harvester": "#bf1b00",
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 31
- },
- "hiddenSeries": false,
- "id": 20,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/.*capacity.*/",
- "fill": 0,
- "linewidth": 2
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_notifications_queue_capacity{instance=\"$instance\"})by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Alert queue capacity ",
- "metric": "prometheus_local_storage_checkpoint_last_size_bytes",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(prometheus_notifications_queue_length{instance=\"$instance\"})by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Alert queue size on ",
- "metric": "prometheus_local_storage_checkpoint_last_size_bytes",
- "refId": "B",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Alert queue size",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 31
- },
- "hiddenSeries": false,
- "id": 21,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_notifications_alertmanagers_discovered{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Checkpoint chunks written/s",
- "metric": "prometheus_local_storage_checkpoint_series_chunks_written_sum",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Count of discovered alertmanagers",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 31
- },
- "hiddenSeries": false,
- "id": 39,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_notifications_dropped_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "notifications_dropped on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "F",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_rule_evaluation_failures_total{rule_type=\"alerting\",instance=\"$instance\"}[$aggregation_interval])) by (rule_type,instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "rule_evaluation_failures on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Alerting errors",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 38
- },
- "id": 60,
- "panels": [
-
- ],
- "repeat": null,
- "title": "Service discovery",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 0,
- "y": 39
- },
- "hiddenSeries": false,
- "id": 43,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"consul\", instance=\"$instance\"}[$aggregation_interval])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Consul target sync count",
- "refId": "A",
- "step": 240
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Consul SD sync count",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 6,
- "y": 39
- },
- "hiddenSeries": false,
- "id": 44,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"marathon\", instance=\"$instance\"}[$aggregation_interval])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Marathon target sync count",
- "refId": "A",
- "step": 240
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Marathon SD sync count",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 39
- },
- "hiddenSeries": false,
- "id": 45,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "increase(prometheus_target_sync_length_seconds_count{scrape_job=\"kubernetes\"}[$aggregation_interval])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Count of target synces",
- "refId": "A",
- "step": 240
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Kubernetes SD sync count",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 39
- },
- "hiddenSeries": false,
- "id": 46,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "exceeded_sample_limit on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "A",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_sd_file_read_errors_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "sd_file_read_error on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "E",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "sd_consul_rpc_failure on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "H",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "sd_marathon_refresh_failure on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "I",
- "step": 1800
- },
- {
- "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "sd_openstack_refresh_failure on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "J",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Service discovery errors",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 46
- },
- "id": 61,
- "panels": [
-
- ],
- "repeat": null,
- "title": "TSDB stats",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 0,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 36,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_tsdb_reloads_total{instance=\"$instance\"}[30m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Reloaded block from disk",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 6,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_tsdb_blocks_loaded{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Loaded data blocks",
- "metric": "prometheus_local_storage_memory_chunkdescs",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Loaded data blocks",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 3,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "prometheus_tsdb_head_series{instance=\"$instance\"}",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Time series count",
- "metric": "prometheus_local_storage_memory_series",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Time series total count",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 47
- },
- "hiddenSeries": false,
- "id": 1,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(prometheus_tsdb_head_samples_appended_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "samples/s {{instance}}",
- "metric": "prometheus_local_storage_ingested_samples_total",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Samples Appended per second",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": "",
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 54
- },
- "id": 62,
- "panels": [
-
- ],
- "repeat": null,
- "title": "Head block stats",
- "type": "row"
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833",
- "To persist": "#9AC48A"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 55
- },
- "hiddenSeries": false,
- "id": 2,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/Max.*/",
- "fill": 0
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_tsdb_head_chunks{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "Head chunk count",
- "metric": "prometheus_local_storage_memory_chunks",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Head chunks count",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 55
- },
- "hiddenSeries": false,
- "id": 35,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "max(prometheus_tsdb_head_max_time{instance=\"$instance\"}) by (instance) - min(prometheus_tsdb_head_min_time{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Length of head block",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "ms",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 55
- },
- "hiddenSeries": false,
- "id": 4,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(prometheus_tsdb_head_chunks_created_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "created on {{ instance }}",
- "refId": "B"
- },
- {
- "expr": "sum(rate(prometheus_tsdb_head_chunks_removed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) * -1",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "deleted on {{ instance }}",
- "refId": "C"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Head Chunks Created/Deleted per second",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 62
- },
- "id": 63,
- "panels": [
-
- ],
- "repeat": null,
- "title": "Data maintenance",
- "type": "row"
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 0,
- "y": 63
- },
- "hiddenSeries": false,
- "id": 33,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(prometheus_tsdb_compaction_duration_sum{instance=\"$instance\"}[30m]) / increase(prometheus_tsdb_compaction_duration_count{instance=\"$instance\"}[30m])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ instance }}",
- "refId": "B"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Compaction duration",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 6,
- "y": 63
- },
- "hiddenSeries": false,
- "id": 34,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_tsdb_head_gc_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ quantile }} on {{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Go Garbage collection duration",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 12,
- "y": 63
- },
- "hiddenSeries": false,
- "id": 37,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(prometheus_tsdb_wal_truncate_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ quantile }} on {{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "WAL truncate duration seconds",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 6,
- "x": 18,
- "y": 63
- },
- "hiddenSeries": false,
- "id": 38,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "connected",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(tsdb_wal_fsync_duration_seconds{instance=\"$instance\"}) by (instance, quantile)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{ quantile }} {{ instance }}",
- "refId": "A"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "WAL fsync duration seconds",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 70
- },
- "id": 64,
- "panels": [
-
- ],
- "repeat": null,
- "title": "RAM&CPU",
- "type": "row"
- },
- {
- "aliasColors": {
- "Allocated bytes": "#7EB26D",
- "Allocated bytes - 1m max": "#BF1B00",
- "Allocated bytes - 1m min": "#BF1B00",
- "Allocated bytes - 5m max": "#BF1B00",
- "Allocated bytes - 5m min": "#BF1B00",
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833",
- "RSS": "#447EBC"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "decimals": null,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 71
- },
- "hiddenSeries": false,
- "id": 6,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "/-/",
- "fill": 0
- },
- {
- "alias": "collector heap size",
- "color": "#E0752D",
- "fill": 0,
- "linewidth": 2
- },
- {
- "alias": "collector kubernetes memory limit",
- "color": "#BF1B00",
- "fill": 0,
- "linewidth": 3
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(process_resident_memory_bytes{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "Total resident memory - {{instance}}",
- "metric": "process_resident_memory_bytes",
- "refId": "B",
- "step": 1800
- },
- {
- "expr": "sum(go_memstats_alloc_bytes{instance=\"$instance\"}) by (instance)",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "Total llocated bytes - {{instance}}",
- "metric": "go_memstats_alloc_bytes",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Memory",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
- "Allocated bytes": "#F9BA8F",
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833",
- "RSS": "#890F02"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 71
- },
- "hiddenSeries": false,
- "id": 7,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "rate(go_memstats_alloc_bytes_total{instance=\"$instance\"}[$aggregation_interval])",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Allocated Bytes/s",
- "metric": "go_memstats_alloc_bytes",
- "refId": "A",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Allocations per second",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "decimals": 2,
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 71
- },
- "hiddenSeries": false,
- "id": 9,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "hideEmpty": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(rate(process_cpu_seconds_total{instance=\"$instance\"}[$aggregation_interval])) by (instance)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "CPU/s",
- "metric": "prometheus_local_storage_ingested_samples_total",
- "refId": "B",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "CPU per second",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
- "avg"
- ]
- },
- "yaxes": [
- {
- "format": "none",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "collapsed": true,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 78
- },
- "id": 68,
- "panels": [
- {
- "aliasColors": {
- "Chunks": "#1F78C1",
- "Chunks to persist": "#508642",
- "Max chunks": "#052B51",
- "Max to persist": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "editable": true,
- "error": false,
- "fieldConfig": {
- "defaults": {
- "custom": {
-
- }
- },
- "overrides": [
-
- ]
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 79
- },
- "hiddenSeries": false,
- "id": 47,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pluginVersion": "7.1.2",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=\"$instance\"}[$aggregation_interval])) by (instance) > 0",
- "format": "time_series",
- "hide": false,
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "conntrack_dialer_conn_failed on {{ instance }}",
- "metric": "prometheus_local_storage_chunk_ops_total",
- "refId": "M",
- "step": 1800
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeRegions": [
-
- ],
- "timeShift": null,
- "title": "Net errors",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- }
- ],
- "repeat": null,
- "title": "Contrac errors",
- "type": "row"
- }
- ],
- "refresh": "5m",
- "schemaVersion": 26,
- "style": "dark",
- "tags": [
- "custom"
- ],
- "templating": {
- "list": [
- {
- "auto": true,
- "auto_count": 30,
- "auto_min": "2m",
- "current": {
- "selected": false,
- "text": "auto",
- "value": "$__auto_interval_aggregation_interval"
- },
- "hide": 0,
- "label": "aggregation intarval",
- "name": "aggregation_interval",
- "options": [
- {
- "selected": true,
- "text": "auto",
- "value": "$__auto_interval_aggregation_interval"
- },
- {
- "selected": false,
- "text": "1m",
- "value": "1m"
- },
- {
- "selected": false,
- "text": "10m",
- "value": "10m"
- },
- {
- "selected": false,
- "text": "30m",
- "value": "30m"
- },
- {
- "selected": false,
- "text": "1h",
- "value": "1h"
- },
- {
- "selected": false,
- "text": "6h",
- "value": "6h"
- },
- {
- "selected": false,
- "text": "12h",
- "value": "12h"
- },
- {
- "selected": false,
- "text": "1d",
- "value": "1d"
- },
- {
- "selected": false,
- "text": "7d",
- "value": "7d"
- },
- {
- "selected": false,
- "text": "14d",
- "value": "14d"
- },
- {
- "selected": false,
- "text": "30d",
- "value": "30d"
- }
- ],
- "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
- "refresh": 2,
- "skipUrlSync": false,
- "type": "interval"
- },
- {
- "allValue": null,
- "current": {
- "selected": false,
- "text": "localhost:9090",
- "value": "localhost:9090"
- },
- "datasource": "$datasource",
- "definition": "",
- "hide": 0,
- "includeAll": false,
- "label": "Instance",
- "multi": false,
- "name": "instance",
- "options": [
-
- ],
- "query": "label_values(prometheus_build_info, instance)",
- "refresh": 2,
- "regex": "",
- "skipUrlSync": false,
- "sort": 2,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "current": {
- "text": "60",
- "value": "60"
- },
- "hide": 0,
- "label": "Scrape interval seconds",
- "name": "scrape_interval",
- "options": [
- {
- "text": "60",
- "value": "60"
- }
- ],
- "query": "60",
- "skipUrlSync": false,
- "type": "constant"
- },
- {
- "current": {
- "selected": false,
- "text": "Prometheus",
- "value": "Prometheus"
- },
- "hide": 0,
- "includeAll": false,
- "label": "Prometheus datasource",
- "multi": false,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "skipUrlSync": false,
- "type": "datasource"
- },
- {
- "current": {
- "selected": false,
- "text": "No data sources found",
- "value": ""
- },
- "hide": 0,
- "includeAll": false,
- "label": "InfluxDB datasource",
- "multi": false,
- "name": "influx_datasource",
- "options": [
-
- ],
- "query": "influxdb",
- "refresh": 1,
- "regex": "",
- "skipUrlSync": false,
- "type": "datasource"
- }
- ]
- },
- "time": {
- "from": "now-3h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "browser",
- "title": "Prometheus Monitoring",
- "uid": "XmsJC9mRz",
- "version": 2
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-prometheus-dashboard
- namespace: monitoring
- apiVersion: v1
data:
prometheus-remote-write.json: |-
@@ -35226,7 +31050,7 @@ items:
"links": [
],
- "refresh": "",
+ "refresh": "60s",
"rows": [
{
"collapse": false,
@@ -35241,6 +31065,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -35277,7 +31102,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n",
+ "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -35333,6 +31158,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -35369,7 +31195,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n",
+ "expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n, 0)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -35438,6 +31264,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -35474,7 +31301,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n",
+ "expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -35543,6 +31370,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -35636,6 +31464,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -35728,6 +31557,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -35820,6 +31650,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -35925,6 +31756,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -36017,6 +31849,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -36053,7 +31886,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}",
+ "expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -36122,6 +31955,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -36214,6 +32048,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -36319,6 +32154,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -36355,7 +32191,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
+ "expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -36411,6 +32247,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -36447,7 +32284,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
+ "expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -36503,6 +32340,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -36539,7 +32377,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
+ "expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -36595,6 +32433,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
@@ -36691,7 +32530,7 @@ items:
"schemaVersion": 14,
"style": "dark",
"tags": [
-
+ "prometheus-mixin"
],
"templating": {
"list": [
@@ -36726,11 +32565,11 @@ items:
"includeAll": true,
"label": null,
"multi": false,
- "name": "instance",
+ "name": "cluster",
"options": [
],
- "query": "label_values(prometheus_build_info, instance)",
+ "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)",
"refresh": 2,
"regex": "",
"sort": 0,
@@ -36761,11 +32600,11 @@ items:
"includeAll": true,
"label": null,
"multi": false,
- "name": "cluster",
+ "name": "instance",
"options": [
],
- "query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)",
+ "query": "label_values(prometheus_build_info{cluster=~\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 0,
@@ -36835,11 +32674,16 @@ items:
]
},
"timezone": "browser",
- "title": "Prometheus Remote Write",
+ "title": "Prometheus / Remote Write",
"version": 0
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-prometheus-remote-write
namespace: monitoring
- apiVersion: v1
@@ -36858,7 +32702,7 @@ items:
"links": [
],
- "refresh": "10s",
+ "refresh": "60s",
"rows": [
{
"collapse": false,
@@ -36916,6 +32760,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #A",
@@ -36934,6 +32779,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "Value #B",
@@ -36941,7 +32787,7 @@ items:
],
"type": "number",
- "unit": "short"
+ "unit": "s"
},
{
"alias": "Instance",
@@ -36952,6 +32798,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "instance",
@@ -36970,6 +32817,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "job",
@@ -36988,6 +32836,7 @@ items:
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"link": false,
+ "linkTargetBlank": false,
"linkTooltip": "Drill down",
"linkUrl": "",
"pattern": "version",
@@ -37041,7 +32890,7 @@ items:
"title": "Prometheus Stats",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"transform": "table",
@@ -37140,7 +32989,7 @@ items:
"title": "Target Sync",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37226,7 +33075,7 @@ items:
"title": "Targets",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37324,7 +33173,7 @@ items:
"title": "Average Scrape Interval Duration",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37393,6 +33242,14 @@ items:
"stack": true,
"steppedLine": false,
"targets": [
+ {
+ "expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "exceeded body size limit: {{job}}",
+ "legendLink": null,
+ "step": 10
+ },
{
"expr": "sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))",
"format": "time_series",
@@ -37434,7 +33291,7 @@ items:
"title": "Scrape failures",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37520,7 +33377,7 @@ items:
"title": "Appended Samples",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37618,7 +33475,7 @@ items:
"title": "Head Series",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37704,7 +33561,7 @@ items:
"title": "Head Chunks",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37802,7 +33659,7 @@ items:
"title": "Query Rate",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37888,7 +33745,7 @@ items:
"title": "Stage Duration",
"tooltip": {
"shared": true,
- "sort": 0,
+ "sort": 2,
"value_type": "individual"
},
"type": "graph",
@@ -37932,7 +33789,7 @@ items:
"schemaVersion": 14,
"style": "dark",
"tags": [
-
+ "prometheus-mixin"
],
"templating": {
"list": [
@@ -37942,7 +33799,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -37953,7 +33810,7 @@ items:
"type": "datasource"
},
{
- "allValue": null,
+ "allValue": ".+",
"current": {
"selected": true,
"text": "All",
@@ -37968,7 +33825,7 @@ items:
"options": [
],
- "query": "label_values(prometheus_build_info, job)",
+ "query": "label_values(prometheus_build_info{job=\"prometheus-k8s\",namespace=\"monitoring\"}, job)",
"refresh": 1,
"regex": "",
"sort": 2,
@@ -37981,7 +33838,7 @@ items:
"useTags": false
},
{
- "allValue": null,
+ "allValue": ".+",
"current": {
"selected": true,
"text": "All",
@@ -37996,7 +33853,7 @@ items:
"options": [
],
- "query": "label_values(prometheus_build_info, instance)",
+ "query": "label_values(prometheus_build_info{job=~\"$job\"}, instance)",
"refresh": 1,
"regex": "",
"sort": 2,
@@ -38040,12 +33897,17 @@ items:
]
},
"timezone": "utc",
- "title": "Prometheus",
+ "title": "Prometheus / Overview",
"uid": "",
"version": 0
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-prometheus
namespace: monitoring
- apiVersion: v1
@@ -38099,7 +33961,11 @@ items:
},
"id": 2,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -38138,7 +34004,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(up{job=\"kube-proxy\"})",
+ "expr": "sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -38170,17 +34036,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 3,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -38206,7 +34074,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
+ "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "rate",
@@ -38262,10 +34130,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 4,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -38298,7 +34168,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
+ "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -38367,17 +34237,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 5,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -38403,7 +34275,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))",
+ "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "rate",
@@ -38459,10 +34331,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 6,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -38495,7 +34369,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -38564,17 +34438,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 7,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -38600,28 +34476,28 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@@ -38677,17 +34553,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 8,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -38713,7 +34591,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@@ -38782,10 +34660,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 9,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -38818,7 +34698,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@@ -38887,17 +34767,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 10,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -38923,7 +34805,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{job=\"kube-proxy\",instance=~\"$instance\"}",
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -38979,17 +34861,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 11,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -39015,7 +34899,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{job=\"kube-proxy\",instance=~\"$instance\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -39071,17 +34955,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 12,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -39107,7 +34993,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{job=\"kube-proxy\",instance=~\"$instance\"}",
+ "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -39177,7 +35063,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -39191,6 +35077,32 @@ items:
"allValue": null,
"current": {
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kube-proxy\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+
},
"datasource": "$datasource",
"hide": 0,
@@ -39201,7 +35113,7 @@ items:
"options": [
],
- "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\"}, instance)",
+ "query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -39251,6 +35163,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-proxy
namespace: monitoring
- apiVersion: v1
@@ -39304,7 +35221,11 @@ items:
},
"id": 2,
- "interval": null,
+ "interval": "1m",
+ "legend": {
+ "alignAsTable": true,
+ "rightSide": true
+ },
"links": [
],
@@ -39343,7 +35264,7 @@ items:
"tableColumn": "",
"targets": [
{
- "expr": "sum(up{job=\"kube-scheduler\"})",
+ "expr": "sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -39375,10 +35296,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 3,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -39411,31 +35334,31 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} e2e",
+ "legendFormat": "{{cluster}} {{instance}} e2e",
"refId": "A"
},
{
- "expr": "sum(rate(scheduler_binding_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} binding",
+ "legendFormat": "{{cluster}} {{instance}} binding",
"refId": "B"
},
{
- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} scheduling algorithm",
+ "legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
"refId": "C"
},
{
- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)",
+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} volume",
+ "legendFormat": "{{cluster}} {{instance}} volume",
"refId": "D"
}
],
@@ -39488,10 +35411,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 4,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -39524,31 +35449,31 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} e2e",
+ "legendFormat": "{{cluster}} {{instance}} e2e",
"refId": "A"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} binding",
+ "legendFormat": "{{cluster}} {{instance}} binding",
"refId": "B"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} scheduling algorithm",
+ "legendFormat": "{{cluster}} {{instance}} scheduling algorithm",
"refId": "C"
},
{
- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "{{instance}} volume",
+ "legendFormat": "{{cluster}} {{instance}} volume",
"refId": "D"
}
],
@@ -39614,17 +35539,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 5,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -39650,28 +35577,28 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "2xx",
"refId": "A"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "3xx",
"refId": "B"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "4xx",
"refId": "C"
},
{
- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
+ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5xx",
@@ -39727,17 +35654,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 6,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -39763,7 +35692,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@@ -39832,10 +35761,12 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 7,
+ "interval": "1m",
"legend": {
"alignAsTable": true,
"avg": false,
@@ -39868,7 +35799,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{verb}} {{url}}",
@@ -39937,17 +35868,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 8,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -39973,7 +35906,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{job=\"kube-scheduler\", instance=~\"$instance\"}",
+ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -40029,17 +35962,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 9,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -40065,7 +36000,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "rate(process_cpu_seconds_total{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])",
+ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -40121,17 +36056,19 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 1,
+ "fillGradient": 0,
"gridPos": {
},
"id": 10,
+ "interval": "1m",
"legend": {
- "alignAsTable": false,
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
- "rightSide": false,
+ "rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
@@ -40157,7 +36094,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{job=\"kube-scheduler\",instance=~\"$instance\"}",
+ "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -40227,7 +36164,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -40241,6 +36178,32 @@ items:
"allValue": null,
"current": {
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(up{job=\"kube-scheduler\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+
},
"datasource": "$datasource",
"hide": 0,
@@ -40251,7 +36214,7 @@ items:
"options": [
],
- "query": "label_values(process_cpu_seconds_total{job=\"kube-scheduler\"}, instance)",
+ "query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 1,
@@ -40301,919 +36264,13 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-scheduler
namespace: monitoring
-- apiVersion: v1
- data:
- statefulset.json: |-
- {
- "__inputs": [
-
- ],
- "__requires": [
-
- ],
- "annotations": {
- "list": [
-
- ]
- },
- "editable": false,
- "gnetId": null,
- "graphTooltip": 0,
- "hideControls": false,
- "id": null,
- "links": [
-
- ],
- "refresh": "",
- "rows": [
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 2,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "cores",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 4,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "CPU",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 3,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "GB",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 4,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Memory",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 4,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "Bps",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 4,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Network",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
- }
- ],
- "valueName": "current"
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "height": "100px",
- "panels": [
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 5,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Desired Replicas",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 6,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Replicas of current version",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 7,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Observed Generation",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "$datasource",
- "format": "none",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
-
- },
- "id": 8,
- "interval": null,
- "links": [
-
- ],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 3,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "Metadata Generation",
- "tooltip": {
- "shared": false
- },
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "0",
- "value": "null"
- }
- ],
- "valueName": "current"
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- },
- {
- "collapse": false,
- "collapsed": false,
- "panels": [
- {
- "aliasColors": {
-
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "$datasource",
- "fill": 1,
- "gridPos": {
-
- },
- "id": 9,
- "legend": {
- "alignAsTable": false,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": false,
- "show": true,
- "sideWidth": null,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [
-
- ],
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "repeat": null,
- "seriesOverrides": [
-
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "replicas specified",
- "refId": "A"
- },
- {
- "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "replicas created",
- "refId": "B"
- },
- {
- "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "ready",
- "refId": "C"
- },
- {
- "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "replicas of current version",
- "refId": "D"
- },
- {
- "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "updated",
- "refId": "E"
- }
- ],
- "thresholds": [
-
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Replicas",
- "tooltip": {
- "shared": false,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": [
-
- ]
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ]
- }
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": false,
- "title": "Dashboard Row",
- "titleSize": "h6",
- "type": "row"
- }
- ],
- "schemaVersion": 14,
- "style": "dark",
- "tags": [
- "kubernetes-mixin"
- ],
- "templating": {
- "list": [
- {
- "current": {
- "text": "default",
- "value": "default"
- },
- "hide": 0,
- "label": null,
- "name": "datasource",
- "options": [
-
- ],
- "query": "prometheus",
- "refresh": 1,
- "regex": "",
- "type": "datasource"
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 2,
- "includeAll": false,
- "label": "cluster",
- "multi": false,
- "name": "cluster",
- "options": [
-
- ],
- "query": "label_values(kube_statefulset_metadata_generation, cluster)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": "Namespace",
- "multi": false,
- "name": "namespace",
- "options": [
-
- ],
- "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": null,
- "current": {
-
- },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": false,
- "label": "Name",
- "multi": false,
- "name": "statefulset",
- "options": [
-
- ],
- "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)",
- "refresh": 2,
- "regex": "",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [
-
- ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- }
- ]
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {
- "refresh_intervals": [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ],
- "time_options": [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ]
- },
- "timezone": "UTC",
- "title": "Kubernetes / StatefulSets",
- "uid": "a31c1f46e6f727cb37c0d731a7245005",
- "version": 0
- }
- kind: ConfigMap
- metadata:
- name: grafana-dashboard-statefulset
- namespace: monitoring
- apiVersion: v1
data:
workload-total.json: |-
@@ -41276,6 +36333,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41321,7 +36379,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@@ -41378,6 +36436,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41423,7 +36482,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@@ -41491,6 +36550,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41536,7 +36596,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(avg(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@@ -41593,6 +36653,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41638,7 +36699,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(avg(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ pod }}",
@@ -41725,6 +36786,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41768,7 +36830,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -41825,6 +36887,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41868,7 +36931,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -41936,6 +36999,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -41979,7 +37043,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -42036,6 +37100,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -42079,7 +37144,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -42156,6 +37221,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -42199,7 +37265,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_receive_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -42256,6 +37322,7 @@ items:
"dashes": false,
"datasource": "$datasource",
"fill": 2,
+ "fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
@@ -42299,7 +37366,7 @@ items:
"steppedLine": false,
"targets": [
{
- "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
+ "expr": "sort_desc(sum(irate(container_network_transmit_packets_dropped_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$interval:$resolution])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
@@ -42374,7 +37441,7 @@ items:
"value": "default"
},
"hide": 0,
- "label": null,
+ "label": "Data Source",
"name": "datasource",
"options": [
@@ -42384,6 +37451,32 @@ items:
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": null,
+ "current": {
+
+ },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "cluster",
+ "options": [
+
+ ],
+ "query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [
+
+ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": ".+",
"auto": false,
@@ -42394,7 +37487,7 @@ items:
"value": "kube-system"
},
"datasource": "$datasource",
- "definition": "label_values(container_network_receive_packets_total, namespace)",
+ "definition": "label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)",
"hide": 0,
"includeAll": true,
"label": null,
@@ -42403,8 +37496,8 @@ items:
"options": [
],
- "query": "label_values(container_network_receive_packets_total, namespace)",
- "refresh": 1,
+ "query": "label_values(container_network_receive_packets_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\"}, namespace)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -42426,7 +37519,7 @@ items:
"value": ""
},
"datasource": "$datasource",
- "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)",
+ "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)",
"hide": 0,
"includeAll": false,
"label": null,
@@ -42435,8 +37528,8 @@ items:
"options": [
],
- "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\"}, workload)",
- "refresh": 1,
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\"}, workload)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
@@ -42458,7 +37551,7 @@ items:
"value": "deployment"
},
"datasource": "$datasource",
- "definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)",
+ "definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)",
"hide": 0,
"includeAll": false,
"label": null,
@@ -42467,8 +37560,8 @@ items:
"options": [
],
- "query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)",
- "refresh": 1,
+ "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)",
+ "refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 0,
@@ -42598,6 +37691,11 @@ items:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboard-workload-total
namespace: monitoring
kind: ConfigMapList
diff --git a/manifests/grafana-dashboardSources.yaml b/manifests/grafana-dashboardSources.yaml
index fffec98..8fc6d8e 100644
--- a/manifests/grafana-dashboardSources.yaml
+++ b/manifests/grafana-dashboardSources.yaml
@@ -6,6 +6,7 @@ data:
"providers": [
{
"folder": "Default",
+ "folderUid": "",
"name": "0",
"options": {
"path": "/grafana-dashboard-definitions/0"
@@ -17,5 +18,10 @@ data:
}
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana-dashboards
namespace: monitoring
diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml
index ed55fa9..eca41b6 100644
--- a/manifests/grafana-deployment.yaml
+++ b/manifests/grafana-deployment.yaml
@@ -2,22 +2,35 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
+ annotations:
+ checksum/grafana-config: adbde4cde1aa3ca57c408943af53e6f7
+ checksum/grafana-dashboardproviders: d8fb24844314114bed088b83042b1bdb
+ checksum/grafana-datasources: 0800bab7ea1e2d8ad5c09586d089e033
labels:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
spec:
+ automountServiceAccountToken: false
containers:
- env: []
- image: grafana/grafana:7.0.3
+ image: grafana/grafana:9.3.2
name: grafana
ports:
- containerPort: 3000
@@ -33,6 +46,12 @@ spec:
requests:
cpu: 100m
memory: 100Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /var/lib/grafana
name: grafana-storage
@@ -43,6 +62,12 @@ spec:
- mountPath: /etc/grafana/provisioning/dashboards
name: grafana-dashboards
readOnly: false
+ - mountPath: /tmp
+ name: tmp-plugins
+ readOnly: false
+ - mountPath: /grafana-dashboard-definitions/0/alertmanager-overview
+ name: grafana-dashboard-alertmanager-overview
+ readOnly: false
- mountPath: /grafana-dashboard-definitions/0/apiserver
name: grafana-dashboard-apiserver
readOnly: false
@@ -52,8 +77,8 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/controller-manager
name: grafana-dashboard-controller-manager
readOnly: false
- - mountPath: /grafana-dashboard-definitions/0/coredns-dashboard
- name: grafana-dashboard-coredns-dashboard
+ - mountPath: /grafana-dashboard-definitions/0/grafana-overview
+ name: grafana-dashboard-grafana-overview
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
@@ -76,9 +101,6 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/kubelet
name: grafana-dashboard-kubelet
readOnly: false
- - mountPath: /grafana-dashboard-definitions/0/kubernetes-cluster-dashboard
- name: grafana-dashboard-kubernetes-cluster-dashboard
- readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
name: grafana-dashboard-namespace-by-pod
readOnly: false
@@ -91,6 +113,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
name: grafana-dashboard-node-rsrc-use
readOnly: false
+ - mountPath: /grafana-dashboard-definitions/0/nodes-darwin
+ name: grafana-dashboard-nodes-darwin
+ readOnly: false
- mountPath: /grafana-dashboard-definitions/0/nodes
name: grafana-dashboard-nodes
readOnly: false
@@ -100,9 +125,6 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/pod-total
name: grafana-dashboard-pod-total
readOnly: false
- - mountPath: /grafana-dashboard-definitions/0/prometheus-dashboard
- name: grafana-dashboard-prometheus-dashboard
- readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
readOnly: false
@@ -115,9 +137,6 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/scheduler
name: grafana-dashboard-scheduler
readOnly: false
- - mountPath: /grafana-dashboard-definitions/0/statefulset
- name: grafana-dashboard-statefulset
- readOnly: false
- mountPath: /grafana-dashboard-definitions/0/workload-total
name: grafana-dashboard-workload-total
readOnly: false
@@ -125,8 +144,9 @@ spec:
name: grafana-config
readOnly: false
nodeSelector:
- beta.kubernetes.io/os: linux
+ kubernetes.io/os: linux
securityContext:
+ fsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: grafana
@@ -139,6 +159,12 @@ spec:
- configMap:
name: grafana-dashboards
name: grafana-dashboards
+ - emptyDir:
+ medium: Memory
+ name: tmp-plugins
+ - configMap:
+ name: grafana-dashboard-alertmanager-overview
+ name: grafana-dashboard-alertmanager-overview
- configMap:
name: grafana-dashboard-apiserver
name: grafana-dashboard-apiserver
@@ -149,8 +175,8 @@ spec:
name: grafana-dashboard-controller-manager
name: grafana-dashboard-controller-manager
- configMap:
- name: grafana-dashboard-coredns-dashboard
- name: grafana-dashboard-coredns-dashboard
+ name: grafana-dashboard-grafana-overview
+ name: grafana-dashboard-grafana-overview
- configMap:
name: grafana-dashboard-k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
@@ -172,9 +198,6 @@ spec:
- configMap:
name: grafana-dashboard-kubelet
name: grafana-dashboard-kubelet
- - configMap:
- name: grafana-dashboard-kubernetes-cluster-dashboard
- name: grafana-dashboard-kubernetes-cluster-dashboard
- configMap:
name: grafana-dashboard-namespace-by-pod
name: grafana-dashboard-namespace-by-pod
@@ -187,6 +210,9 @@ spec:
- configMap:
name: grafana-dashboard-node-rsrc-use
name: grafana-dashboard-node-rsrc-use
+ - configMap:
+ name: grafana-dashboard-nodes-darwin
+ name: grafana-dashboard-nodes-darwin
- configMap:
name: grafana-dashboard-nodes
name: grafana-dashboard-nodes
@@ -196,9 +222,6 @@ spec:
- configMap:
name: grafana-dashboard-pod-total
name: grafana-dashboard-pod-total
- - configMap:
- name: grafana-dashboard-prometheus-dashboard
- name: grafana-dashboard-prometheus-dashboard
- configMap:
name: grafana-dashboard-prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
@@ -211,9 +234,6 @@ spec:
- configMap:
name: grafana-dashboard-scheduler
name: grafana-dashboard-scheduler
- - configMap:
- name: grafana-dashboard-statefulset
- name: grafana-dashboard-statefulset
- configMap:
name: grafana-dashboard-workload-total
name: grafana-dashboard-workload-total
diff --git a/manifests/grafana-networkPolicy.yaml b/manifests/grafana-networkPolicy.yaml
new file mode 100644
index 0000000..cab676c
--- /dev/null
+++ b/manifests/grafana-networkPolicy.yaml
@@ -0,0 +1,29 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ name: grafana
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 3000
+ protocol: TCP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/grafana-prometheusRule.yaml b/manifests/grafana-prometheusRule.yaml
new file mode 100644
index 0000000..7ac2cfc
--- /dev/null
+++ b/manifests/grafana-prometheusRule.yaml
@@ -0,0 +1,33 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
+ prometheus: k8s
+ role: alert-rules
+ name: grafana-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: GrafanaAlerts
+ rules:
+ - alert: GrafanaRequestsFailing
+ annotations:
+ message: '{{ $labels.namespace }}/{{ $labels.job }}/{{ $labels.handler }} is experiencing {{ $value | humanize }}% errors'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/grafana/grafanarequestsfailing
+ expr: |
+ 100 * namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query", status_code=~"5.."}
+ / ignoring (status_code)
+ sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"})
+ > 50
+ for: 5m
+ labels:
+ severity: warning
+ - name: grafana_rules
+ rules:
+ - expr: |
+ sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))
+ record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m
diff --git a/manifests/grafana-service.yaml b/manifests/grafana-service.yaml
index 3acdf1e..ce95f07 100644
--- a/manifests/grafana-service.yaml
+++ b/manifests/grafana-service.yaml
@@ -2,7 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
spec:
@@ -11,4 +14,6 @@ spec:
port: 3000
targetPort: http
selector:
- app: grafana
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/grafana-serviceAccount.yaml b/manifests/grafana-serviceAccount.yaml
index 3ed3e03..2f87aca 100644
--- a/manifests/grafana-serviceAccount.yaml
+++ b/manifests/grafana-serviceAccount.yaml
@@ -1,5 +1,11 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
diff --git a/manifests/grafana-serviceMonitor.yaml b/manifests/grafana-serviceMonitor.yaml
index 7ede266..f13c73c 100644
--- a/manifests/grafana-serviceMonitor.yaml
+++ b/manifests/grafana-serviceMonitor.yaml
@@ -1,6 +1,11 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
+ labels:
+ app.kubernetes.io/component: grafana
+ app.kubernetes.io/name: grafana
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 9.3.2
name: grafana
namespace: monitoring
spec:
@@ -9,4 +14,4 @@ spec:
port: http
selector:
matchLabels:
- app: grafana
+ app.kubernetes.io/name: grafana
diff --git a/manifests/kube-state-metrics-clusterRole.yaml b/manifests/kube-state-metrics-clusterRole.yaml
index 8c72322..1c1f60a 100644
--- a/manifests/kube-state-metrics-clusterRole.yaml
+++ b/manifests/kube-state-metrics-clusterRole.yaml
@@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
rules:
- apiGroups:
@@ -14,6 +16,7 @@ rules:
- nodes
- pods
- services
+ - serviceaccounts
- resourcequotas
- replicationcontrollers
- limitranges
@@ -24,16 +27,6 @@ rules:
verbs:
- list
- watch
-- apiGroups:
- - extensions
- resources:
- - daemonsets
- - deployments
- - replicasets
- - ingresses
- verbs:
- - list
- - watch
- apiGroups:
- apps
resources:
@@ -85,6 +78,13 @@ rules:
verbs:
- list
- watch
+- apiGroups:
+ - discovery.k8s.io
+ resources:
+ - endpointslices
+ verbs:
+ - list
+ - watch
- apiGroups:
- storage.k8s.io
resources:
@@ -105,6 +105,8 @@ rules:
- networking.k8s.io
resources:
- networkpolicies
+ - ingressclasses
+ - ingresses
verbs:
- list
- watch
@@ -115,3 +117,13 @@ rules:
verbs:
- list
- watch
+- apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - clusterrolebindings
+ - clusterroles
+ - rolebindings
+ - roles
+ verbs:
+ - list
+ - watch
diff --git a/manifests/kube-state-metrics-clusterRoleBinding.yaml b/manifests/kube-state-metrics-clusterRoleBinding.yaml
index 750ff09..88c5faf 100644
--- a/manifests/kube-state-metrics-clusterRoleBinding.yaml
+++ b/manifests/kube-state-metrics-clusterRoleBinding.yaml
@@ -2,8 +2,10 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
diff --git a/manifests/kube-state-metrics-deployment.yaml b/manifests/kube-state-metrics-deployment.yaml
index 787d86e..ec95b5c 100644
--- a/manifests/kube-state-metrics-deployment.yaml
+++ b/manifests/kube-state-metrics-deployment.yaml
@@ -2,55 +2,104 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: kube-state-metrics
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
spec:
+ automountServiceAccountToken: true
containers:
- args:
- --host=127.0.0.1
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
- image: carlosedp/kube-state-metrics:v1.9.6
+ image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.7.0
name: kube-state-metrics
+ resources:
+ limits:
+ cpu: 100m
+ memory: 250Mi
+ requests:
+ cpu: 10m
+ memory: 190Mi
securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
runAsUser: 65534
- args:
- --logtostderr
- --secure-listen-address=:8443
- - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
+ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8081/
- image: carlosedp/kube-rbac-proxy:v0.5.0
+ image: quay.io/brancz/kube-rbac-proxy:v0.14.0
name: kube-rbac-proxy-main
ports:
- containerPort: 8443
name: https-main
+ resources:
+ limits:
+ cpu: 40m
+ memory: 40Mi
+ requests:
+ cpu: 20m
+ memory: 20Mi
securityContext:
- runAsUser: 65534
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ runAsGroup: 65532
+ runAsNonRoot: true
+ runAsUser: 65532
- args:
- --logtostderr
- --secure-listen-address=:9443
- - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
+ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8082/
- image: carlosedp/kube-rbac-proxy:v0.5.0
+ image: quay.io/brancz/kube-rbac-proxy:v0.14.0
name: kube-rbac-proxy-self
ports:
- containerPort: 9443
name: https-self
+ resources:
+ limits:
+ cpu: 20m
+ memory: 40Mi
+ requests:
+ cpu: 10m
+ memory: 20Mi
securityContext:
- runAsUser: 65534
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ runAsGroup: 65532
+ runAsNonRoot: true
+ runAsUser: 65532
nodeSelector:
kubernetes.io/os: linux
serviceAccountName: kube-state-metrics
diff --git a/manifests/kube-state-metrics-networkPolicy.yaml b/manifests/kube-state-metrics-networkPolicy.yaml
new file mode 100644
index 0000000..9815df8
--- /dev/null
+++ b/manifests/kube-state-metrics-networkPolicy.yaml
@@ -0,0 +1,31 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
+ name: kube-state-metrics
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 8443
+ protocol: TCP
+ - port: 9443
+ protocol: TCP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/kube-state-metrics-prometheusRule.yaml b/manifests/kube-state-metrics-prometheusRule.yaml
new file mode 100644
index 0000000..5bfcc43
--- /dev/null
+++ b/manifests/kube-state-metrics-prometheusRule.yaml
@@ -0,0 +1,65 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
+ prometheus: k8s
+ role: alert-rules
+ name: kube-state-metrics-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: kube-state-metrics
+ rules:
+ - alert: KubeStateMetricsListErrors
+ annotations:
+ description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors
+ summary: kube-state-metrics is experiencing errors in list operations.
+ expr: |
+ (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
+ /
+ sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
+ > 0.01
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStateMetricsWatchErrors
+ annotations:
+ description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors
+ summary: kube-state-metrics is experiencing errors in watch operations.
+ expr: |
+ (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
+ /
+ sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
+ > 0.01
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStateMetricsShardingMismatch
+ annotations:
+ description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch
+ summary: kube-state-metrics sharding is misconfigured.
+ expr: |
+ stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: KubeStateMetricsShardsMissing
+ annotations:
+ description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing
+ summary: kube-state-metrics shards are missing.
+ expr: |
+ 2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1
+ -
+ sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) )
+ != 0
+ for: 15m
+ labels:
+ severity: critical
diff --git a/manifests/kube-state-metrics-service.yaml b/manifests/kube-state-metrics-service.yaml
index 7e07515..e349fe7 100644
--- a/manifests/kube-state-metrics-service.yaml
+++ b/manifests/kube-state-metrics-service.yaml
@@ -2,8 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
spec:
@@ -16,4 +18,6 @@ spec:
port: 9443
targetPort: https-self
selector:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/kube-state-metrics-serviceAccount.yaml b/manifests/kube-state-metrics-serviceAccount.yaml
index 6f856d9..be116f6 100644
--- a/manifests/kube-state-metrics-serviceAccount.yaml
+++ b/manifests/kube-state-metrics-serviceAccount.yaml
@@ -1,8 +1,11 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
diff --git a/manifests/kube-state-metrics-serviceMonitor.yaml b/manifests/kube-state-metrics-serviceMonitor.yaml
index ad7a643..1dde4b0 100644
--- a/manifests/kube-state-metrics-serviceMonitor.yaml
+++ b/manifests/kube-state-metrics-serviceMonitor.yaml
@@ -2,8 +2,10 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
- app.kubernetes.io/version: 1.9.6
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
spec:
@@ -11,6 +13,11 @@ spec:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
+ metricRelabelings:
+ - action: drop
+ regex: kube_endpoint_address_not_ready|kube_endpoint_address_available
+ sourceLabels:
+ - __name__
port: https-main
relabelings:
- action: labeldrop
@@ -28,4 +35,6 @@ spec:
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/node-exporter-clusterRole.yaml b/manifests/node-exporter-clusterRole.yaml
index ad783ae..e4a8b76 100644
--- a/manifests/node-exporter-clusterRole.yaml
+++ b/manifests/node-exporter-clusterRole.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
+ namespace: monitoring
rules:
- apiGroups:
- authentication.k8s.io
diff --git a/manifests/node-exporter-clusterRoleBinding.yaml b/manifests/node-exporter-clusterRoleBinding.yaml
index a5a2050..ba3594e 100644
--- a/manifests/node-exporter-clusterRoleBinding.yaml
+++ b/manifests/node-exporter-clusterRoleBinding.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
+ namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
diff --git a/manifests/node-exporter-daemonset.yaml b/manifests/node-exporter-daemonset.yaml
index afe2901..34dca7a 100644
--- a/manifests/node-exporter-daemonset.yaml
+++ b/manifests/node-exporter-daemonset.yaml
@@ -2,30 +2,41 @@ apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
- app.kubernetes.io/version: v0.18.1
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
namespace: monitoring
spec:
selector:
matchLabels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: node-exporter
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
- app.kubernetes.io/version: v0.18.1
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
spec:
+ automountServiceAccountToken: true
containers:
- args:
- --web.listen-address=127.0.0.1:9100
- - --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
+ - --path.udev.data=/host/root/run/udev/data
- --no-collector.wifi
- --no-collector.hwmon
- - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- image: prom/node-exporter:v0.18.1
+ - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/k3s/containerd/.+|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
+ - --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$
+ - --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$
+ image: quay.io/prometheus/node-exporter:v1.5.0
name: node-exporter
resources:
limits:
@@ -34,13 +45,19 @@ spec:
requests:
cpu: 102m
memory: 180Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ add:
+ - SYS_TIME
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
volumeMounts:
- - mountPath: /host/proc
- name: proc
- readOnly: false
- mountPath: /host/sys
+ mountPropagation: HostToContainer
name: sys
- readOnly: false
+ readOnly: true
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
@@ -48,14 +65,14 @@ spec:
- args:
- --logtostderr
- --secure-listen-address=[$(IP)]:9100
- - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
+ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:9100/
env:
- name: IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- image: carlosedp/kube-rbac-proxy:v0.5.0
+ image: quay.io/brancz/kube-rbac-proxy:v0.14.0
name: kube-rbac-proxy
ports:
- containerPort: 9100
@@ -68,10 +85,20 @@ spec:
requests:
cpu: 10m
memory: 20Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
+ runAsGroup: 65532
+ runAsNonRoot: true
+ runAsUser: 65532
hostNetwork: true
hostPID: true
nodeSelector:
kubernetes.io/os: linux
+ priorityClassName: system-cluster-critical
securityContext:
runAsNonRoot: true
runAsUser: 65534
@@ -79,12 +106,13 @@ spec:
tolerations:
- operator: Exists
volumes:
- - hostPath:
- path: /proc
- name: proc
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root
+ updateStrategy:
+ rollingUpdate:
+ maxUnavailable: 10%
+ type: RollingUpdate
diff --git a/manifests/node-exporter-networkPolicy.yaml b/manifests/node-exporter-networkPolicy.yaml
new file mode 100644
index 0000000..c03fdd4
--- /dev/null
+++ b/manifests/node-exporter-networkPolicy.yaml
@@ -0,0 +1,29 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
+ name: node-exporter
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 9100
+ protocol: TCP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/node-exporter-prometheusRule.yaml b/manifests/node-exporter-prometheusRule.yaml
new file mode 100644
index 0000000..5e5e52c
--- /dev/null
+++ b/manifests/node-exporter-prometheusRule.yaml
@@ -0,0 +1,316 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
+ prometheus: k8s
+ role: alert-rules
+ name: node-exporter-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: node-exporter
+ rules:
+ - alert: NodeFilesystemSpaceFillingUp
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
+ summary: Filesystem is predicted to run out of space within the next 24 hours.
+ expr: |
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15
+ and
+ predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemSpaceFillingUp
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
+ summary: Filesystem is predicted to run out of space within the next 4 hours.
+ expr: |
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10
+ and
+ predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeFilesystemAlmostOutOfSpace
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
+ summary: Filesystem has less than 5% space left.
+ expr: |
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 30m
+ labels:
+ severity: warning
+ - alert: NodeFilesystemAlmostOutOfSpace
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
+ summary: Filesystem has less than 3% space left.
+ expr: |
+ (
+ node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 30m
+ labels:
+ severity: critical
+ - alert: NodeFilesystemFilesFillingUp
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
+ summary: Filesystem is predicted to run out of inodes within the next 24 hours.
+ expr: |
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40
+ and
+ predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemFilesFillingUp
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
+ summary: Filesystem is predicted to run out of inodes within the next 4 hours.
+ expr: |
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20
+ and
+ predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeFilesystemAlmostOutOfFiles
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
+ summary: Filesystem has less than 5% inodes left.
+ expr: |
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeFilesystemAlmostOutOfFiles
+ annotations:
+ description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
+ summary: Filesystem has less than 3% inodes left.
+ expr: |
+ (
+ node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
+ and
+ node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+ )
+ for: 1h
+ labels:
+ severity: critical
+ - alert: NodeNetworkReceiveErrs
+ annotations:
+ description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
+ summary: Network interface is reporting many receive errors.
+ expr: |
+ rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeNetworkTransmitErrs
+ annotations:
+ description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
+ summary: Network interface is reporting many transmit errors.
+ expr: |
+ rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
+ for: 1h
+ labels:
+ severity: warning
+ - alert: NodeHighNumberConntrackEntriesUsed
+ annotations:
+ description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
+ summary: Number of conntrack are getting close to the limit.
+ expr: |
+ (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
+ labels:
+ severity: warning
+ - alert: NodeTextFileCollectorScrapeError
+ annotations:
+ description: Node Exporter text file collector failed to scrape.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
+ summary: Node Exporter text file collector failed to scrape.
+ expr: |
+ node_textfile_scrape_error{job="node-exporter"} == 1
+ labels:
+ severity: warning
+ - alert: NodeClockSkewDetected
+ annotations:
+ description: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
+ summary: Clock skew detected.
+ expr: |
+ (
+ node_timex_offset_seconds{job="node-exporter"} > 0.05
+ and
+ deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
+ )
+ or
+ (
+ node_timex_offset_seconds{job="node-exporter"} < -0.05
+ and
+ deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
+ )
+ for: 10m
+ labels:
+ severity: warning
+ - alert: NodeClockNotSynchronising
+ annotations:
+ description: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
+ summary: Clock not synchronising.
+ expr: |
+ min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
+ and
+ node_timex_maxerror_seconds{job="node-exporter"} >= 16
+ for: 10m
+ labels:
+ severity: warning
+ - alert: NodeRAIDDegraded
+ annotations:
+ description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
+ summary: RAID Array is degraded
+ expr: |
+ node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: NodeRAIDDiskFailure
+ annotations:
+ description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
+ summary: Failed device in RAID array
+ expr: |
+ node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0
+ labels:
+ severity: warning
+ - alert: NodeFileDescriptorLimit
+ annotations:
+ description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
+ summary: Kernel is predicted to exhaust file descriptors limit soon.
+ expr: |
+ (
+ node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
+ )
+ for: 15m
+ labels:
+ severity: warning
+ - alert: NodeFileDescriptorLimit
+ annotations:
+ description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
+ summary: Kernel is predicted to exhaust file descriptors limit soon.
+ expr: |
+ (
+ node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
+ )
+ for: 15m
+ labels:
+ severity: critical
+ - name: node-exporter.rules
+ rules:
+ - expr: |
+ count without (cpu, mode) (
+ node_cpu_seconds_total{job="node-exporter",mode="idle"}
+ )
+ record: instance:node_num_cpu:sum
+ - expr: |
+ 1 - avg without (cpu) (
+ sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m]))
+ )
+ record: instance:node_cpu_utilisation:rate5m
+ - expr: |
+ (
+ node_load1{job="node-exporter"}
+ /
+ instance:node_num_cpu:sum{job="node-exporter"}
+ )
+ record: instance:node_load1_per_cpu:ratio
+ - expr: |
+ 1 - (
+ (
+ node_memory_MemAvailable_bytes{job="node-exporter"}
+ or
+ (
+ node_memory_Buffers_bytes{job="node-exporter"}
+ +
+ node_memory_Cached_bytes{job="node-exporter"}
+ +
+ node_memory_MemFree_bytes{job="node-exporter"}
+ +
+ node_memory_Slab_bytes{job="node-exporter"}
+ )
+ )
+ /
+ node_memory_MemTotal_bytes{job="node-exporter"}
+ )
+ record: instance:node_memory_utilisation:ratio
+ - expr: |
+ rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])
+ record: instance:node_vmstat_pgmajfault:rate5m
+ - expr: |
+ rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
+ record: instance_device:node_disk_io_time_seconds:rate5m
+ - expr: |
+ rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
+ record: instance_device:node_disk_io_time_weighted_seconds:rate5m
+ - expr: |
+ sum without (device) (
+ rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m])
+ )
+ record: instance:node_network_receive_bytes_excluding_lo:rate5m
+ - expr: |
+ sum without (device) (
+ rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m])
+ )
+ record: instance:node_network_transmit_bytes_excluding_lo:rate5m
+ - expr: |
+ sum without (device) (
+ rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m])
+ )
+ record: instance:node_network_receive_drop_excluding_lo:rate5m
+ - expr: |
+ sum without (device) (
+ rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m])
+ )
+ record: instance:node_network_transmit_drop_excluding_lo:rate5m
diff --git a/manifests/node-exporter-service.yaml b/manifests/node-exporter-service.yaml
index 7dfbef6..7f3b270 100644
--- a/manifests/node-exporter-service.yaml
+++ b/manifests/node-exporter-service.yaml
@@ -2,8 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
- app.kubernetes.io/version: v0.18.1
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
namespace: monitoring
spec:
@@ -13,4 +15,6 @@ spec:
port: 9100
targetPort: https
selector:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/node-exporter-serviceAccount.yaml b/manifests/node-exporter-serviceAccount.yaml
index 8a03ac1..b3d72ad 100644
--- a/manifests/node-exporter-serviceAccount.yaml
+++ b/manifests/node-exporter-serviceAccount.yaml
@@ -1,5 +1,11 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
namespace: monitoring
diff --git a/manifests/node-exporter-serviceMonitor.yaml b/manifests/node-exporter-serviceMonitor.yaml
index 357164d..00081b2 100644
--- a/manifests/node-exporter-serviceMonitor.yaml
+++ b/manifests/node-exporter-serviceMonitor.yaml
@@ -2,8 +2,10 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
- app.kubernetes.io/version: v0.18.1
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 1.5.0
name: node-exporter
namespace: monitoring
spec:
@@ -24,4 +26,6 @@ spec:
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
+ app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-adapter-apiService.yaml b/manifests/prometheus-adapter-apiService.yaml
index a215efe..bf17b25 100644
--- a/manifests/prometheus-adapter-apiService.yaml
+++ b/manifests/prometheus-adapter-apiService.yaml
@@ -1,6 +1,11 @@
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
diff --git a/manifests/prometheus-adapter-clusterRole.yaml b/manifests/prometheus-adapter-clusterRole.yaml
index a02d2bb..57e4925 100644
--- a/manifests/prometheus-adapter-clusterRole.yaml
+++ b/manifests/prometheus-adapter-clusterRole.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
+ namespace: monitoring
rules:
- apiGroups:
- ""
diff --git a/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml b/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml
index 9f0dbb3..b1f3175 100644
--- a/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml
+++ b/manifests/prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml
@@ -2,10 +2,15 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
+ namespace: monitoring
rules:
- apiGroups:
- metrics.k8s.io
diff --git a/manifests/prometheus-adapter-clusterRoleBinding.yaml b/manifests/prometheus-adapter-clusterRoleBinding.yaml
index 7e8f3da..749fc66 100644
--- a/manifests/prometheus-adapter-clusterRoleBinding.yaml
+++ b/manifests/prometheus-adapter-clusterRoleBinding.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
+ namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
diff --git a/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml b/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml
index 4295b50..eb8fc9e 100644
--- a/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml
+++ b/manifests/prometheus-adapter-clusterRoleBindingDelegator.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: resource-metrics:system:auth-delegator
+ namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
diff --git a/manifests/prometheus-adapter-clusterRoleServerResources.yaml b/manifests/prometheus-adapter-clusterRoleServerResources.yaml
index fcb914c..73d78b0 100644
--- a/manifests/prometheus-adapter-clusterRoleServerResources.yaml
+++ b/manifests/prometheus-adapter-clusterRoleServerResources.yaml
@@ -1,7 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: resource-metrics-server-resources
+ namespace: monitoring
rules:
- apiGroups:
- metrics.k8s.io
diff --git a/manifests/prometheus-adapter-configMap.yaml b/manifests/prometheus-adapter-configMap.yaml
index b2bde3c..a1690bb 100644
--- a/manifests/prometheus-adapter-configMap.yaml
+++ b/manifests/prometheus-adapter-configMap.yaml
@@ -4,8 +4,26 @@ data:
"resourceRules":
"cpu":
"containerLabel": "container"
- "containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
- "nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)"
+ "containerQuery": |
+ sum by (<<.GroupBy>>) (
+ irate (
+ container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="",pod!=""}[120s]
+ )
+ )
+ "nodeQuery": |
+ sum by (<<.GroupBy>>) (
+ 1 - irate(
+ node_cpu_seconds_total{mode="idle"}[60s]
+ )
+ * on(namespace, pod) group_left(node) (
+ node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}
+ )
+ )
+ or sum by (<<.GroupBy>>) (
+ 1 - irate(
+ windows_cpu_time_total{mode="idle", job="windows-exporter",<<.LabelMatchers>>}[4m]
+ )
+ )
"resources":
"overrides":
"namespace":
@@ -16,8 +34,21 @@ data:
"resource": "pod"
"memory":
"containerLabel": "container"
- "containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
- "nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
+ "containerQuery": |
+ sum by (<<.GroupBy>>) (
+ container_memory_working_set_bytes{<<.LabelMatchers>>,container!="",pod!=""}
+ )
+ "nodeQuery": |
+ sum by (<<.GroupBy>>) (
+ node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>}
+ -
+ node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}
+ )
+ or sum by (<<.GroupBy>>) (
+ windows_cs_physical_memory_bytes{job="windows-exporter",<<.LabelMatchers>>}
+ -
+ windows_memory_available_bytes{job="windows-exporter",<<.LabelMatchers>>}
+ )
"resources":
"overrides":
"instance":
@@ -29,5 +60,10 @@ data:
"window": "5m"
kind: ConfigMap
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: adapter-config
namespace: monitoring
diff --git a/manifests/prometheus-adapter-deployment.yaml b/manifests/prometheus-adapter-deployment.yaml
index b95f07d..a4048ac 100644
--- a/manifests/prometheus-adapter-deployment.yaml
+++ b/manifests/prometheus-adapter-deployment.yaml
@@ -1,22 +1,46 @@
apiVersion: apps/v1
kind: Deployment
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
namespace: monitoring
spec:
- replicas: 1
+ replicas: 2
selector:
matchLabels:
- name: prometheus-adapter
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
strategy:
rollingUpdate:
maxSurge: 1
- maxUnavailable: 0
+ maxUnavailable: 1
template:
metadata:
labels:
- name: prometheus-adapter
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
spec:
+ affinity:
+ podAntiAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - podAffinityTerm:
+ labelSelector:
+ matchLabels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ namespaces:
+ - monitoring
+ topologyKey: kubernetes.io/hostname
+ weight: 100
+ automountServiceAccountToken: true
containers:
- args:
- --cert-dir=/var/run/serving-cert
@@ -25,10 +49,41 @@ spec:
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
- --secure-port=6443
- image: directxman12/k8s-prometheus-adapter:v0.7.0
+ - --tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
+ image: registry.k8s.io/prometheus-adapter/prometheus-adapter:v0.10.0
+ livenessProbe:
+ failureThreshold: 5
+ httpGet:
+ path: /livez
+ port: https
+ scheme: HTTPS
+ initialDelaySeconds: 30
+ periodSeconds: 5
name: prometheus-adapter
ports:
- containerPort: 6443
+ name: https
+ readinessProbe:
+ failureThreshold: 5
+ httpGet:
+ path: /readyz
+ port: https
+ scheme: HTTPS
+ initialDelaySeconds: 30
+ periodSeconds: 5
+ resources:
+ limits:
+ cpu: 250m
+ memory: 180Mi
+ requests:
+ cpu: 102m
+ memory: 180Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+ readOnlyRootFilesystem: true
volumeMounts:
- mountPath: /tmp
name: tmpfs
diff --git a/manifests/prometheus-adapter-networkPolicy.yaml b/manifests/prometheus-adapter-networkPolicy.yaml
new file mode 100644
index 0000000..29a11fd
--- /dev/null
+++ b/manifests/prometheus-adapter-networkPolicy.yaml
@@ -0,0 +1,23 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
+ name: prometheus-adapter
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - {}
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/prometheus-adapter-podDisruptionBudget.yaml b/manifests/prometheus-adapter-podDisruptionBudget.yaml
new file mode 100644
index 0000000..033fcaa
--- /dev/null
+++ b/manifests/prometheus-adapter-podDisruptionBudget.yaml
@@ -0,0 +1,17 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
+ name: prometheus-adapter
+ namespace: monitoring
+spec:
+ minAvailable: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-adapter-roleBindingAuthReader.yaml b/manifests/prometheus-adapter-roleBindingAuthReader.yaml
index 48c8f32..5d0d903 100644
--- a/manifests/prometheus-adapter-roleBindingAuthReader.yaml
+++ b/manifests/prometheus-adapter-roleBindingAuthReader.yaml
@@ -1,6 +1,11 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: resource-metrics-auth-reader
namespace: kube-system
roleRef:
diff --git a/manifests/prometheus-adapter-service.yaml b/manifests/prometheus-adapter-service.yaml
index e786e01..c984828 100644
--- a/manifests/prometheus-adapter-service.yaml
+++ b/manifests/prometheus-adapter-service.yaml
@@ -2,7 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
- name: prometheus-adapter
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
namespace: monitoring
spec:
@@ -11,4 +14,6 @@ spec:
port: 443
targetPort: 6443
selector:
- name: prometheus-adapter
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-adapter-serviceAccount.yaml b/manifests/prometheus-adapter-serviceAccount.yaml
index d7e7050..bb1058b 100644
--- a/manifests/prometheus-adapter-serviceAccount.yaml
+++ b/manifests/prometheus-adapter-serviceAccount.yaml
@@ -1,5 +1,11 @@
apiVersion: v1
+automountServiceAccountToken: false
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
name: prometheus-adapter
namespace: monitoring
diff --git a/manifests/prometheus-adapter-serviceMonitor.yaml b/manifests/prometheus-adapter-serviceMonitor.yaml
new file mode 100644
index 0000000..d894145
--- /dev/null
+++ b/manifests/prometheus-adapter-serviceMonitor.yaml
@@ -0,0 +1,28 @@
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ labels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.10.0
+ name: prometheus-adapter
+ namespace: monitoring
+spec:
+ endpoints:
+ - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
+ interval: 30s
+ metricRelabelings:
+ - action: drop
+ regex: (apiserver_client_certificate_.*|apiserver_envelope_.*|apiserver_flowcontrol_.*|apiserver_storage_.*|apiserver_webhooks_.*|workqueue_.*)
+ sourceLabels:
+ - __name__
+ port: https
+ scheme: https
+ tlsConfig:
+ insecureSkipVerify: true
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: metrics-adapter
+ app.kubernetes.io/name: prometheus-adapter
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-clusterRole.yaml b/manifests/prometheus-clusterRole.yaml
index d5c4598..e8e866b 100644
--- a/manifests/prometheus-clusterRole.yaml
+++ b/manifests/prometheus-clusterRole.yaml
@@ -1,6 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
rules:
- apiGroups:
diff --git a/manifests/prometheus-clusterRoleBinding.yaml b/manifests/prometheus-clusterRoleBinding.yaml
index 554bb6f..abd9d64 100644
--- a/manifests/prometheus-clusterRoleBinding.yaml
+++ b/manifests/prometheus-clusterRoleBinding.yaml
@@ -1,6 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
roleRef:
apiGroup: rbac.authorization.k8s.io
diff --git a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml
deleted file mode 100644
index 9506973..0000000
--- a/manifests/prometheus-kubeControllerManagerPrometheusDiscoveryService.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- labels:
- k8s-app: kube-controller-manager
- name: kube-controller-manager-prometheus-discovery
- namespace: kube-system
-spec:
- clusterIP: None
- ports:
- - name: http-metrics
- port: 10252
- targetPort: 10252
- selector:
- component: kube-controller-manager
diff --git a/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml
deleted file mode 100644
index 34e746c..0000000
--- a/manifests/prometheus-kubeDnsPrometheusDiscoveryService.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- labels:
- k8s-app: kube-dns
- name: kube-dns-prometheus-discovery
- namespace: kube-system
-spec:
- clusterIP: None
- ports:
- - name: metrics
- port: 9153
- targetPort: 9153
- selector:
- k8s-app: kube-dns
diff --git a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml b/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml
deleted file mode 100644
index b4843c7..0000000
--- a/manifests/prometheus-kubeSchedulerPrometheusDiscoveryService.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
- labels:
- k8s-app: kube-scheduler
- name: kube-scheduler-prometheus-discovery
- namespace: kube-system
-spec:
- clusterIP: None
- ports:
- - name: http-metrics
- port: 10251
- targetPort: 10251
- selector:
- component: kube-scheduler
diff --git a/manifests/prometheus-networkPolicy.yaml b/manifests/prometheus-networkPolicy.yaml
new file mode 100644
index 0000000..7fbb293
--- /dev/null
+++ b/manifests/prometheus-networkPolicy.yaml
@@ -0,0 +1,40 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
+ name: prometheus-k8s
+ namespace: monitoring
+spec:
+ egress:
+ - {}
+ ingress:
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: prometheus
+ ports:
+ - port: 9090
+ protocol: TCP
+ - port: 8080
+ protocol: TCP
+ - from:
+ - podSelector:
+ matchLabels:
+ app.kubernetes.io/name: grafana
+ ports:
+ - port: 9090
+ protocol: TCP
+ podSelector:
+ matchLabels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ policyTypes:
+ - Egress
+ - Ingress
diff --git a/manifests/prometheus-operator-serviceMonitor.yaml b/manifests/prometheus-operator-serviceMonitor.yaml
index 39e48aa..50d6cd8 100644
--- a/manifests/prometheus-operator-serviceMonitor.yaml
+++ b/manifests/prometheus-operator-serviceMonitor.yaml
@@ -4,7 +4,8 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
- app.kubernetes.io/version: v0.40.0
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.61.1
name: prometheus-operator
namespace: monitoring
spec:
@@ -19,4 +20,5 @@ spec:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
- app.kubernetes.io/version: v0.40.0
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 0.61.1
diff --git a/manifests/prometheus-podDisruptionBudget.yaml b/manifests/prometheus-podDisruptionBudget.yaml
new file mode 100644
index 0000000..7e3e656
--- /dev/null
+++ b/manifests/prometheus-podDisruptionBudget.yaml
@@ -0,0 +1,19 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
+ name: prometheus-k8s
+ namespace: monitoring
+spec:
+ minAvailable: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-prometheus.yaml b/manifests/prometheus-prometheus.yaml
index 2280b93..6e1d307 100644
--- a/manifests/prometheus-prometheus.yaml
+++ b/manifests/prometheus-prometheus.yaml
@@ -2,7 +2,11 @@ apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
labels:
- prometheus: k8s
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: k8s
namespace: monitoring
spec:
@@ -11,35 +15,45 @@ spec:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
- matchExpressions:
- - key: prometheus
- operator: In
- values:
- - k8s
+ matchLabels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
namespaces:
- monitoring
topologyKey: kubernetes.io/hostname
weight: 100
alerting:
alertmanagers:
- - name: alertmanager-main
+ - apiVersion: v2
+ name: alertmanager-main
namespace: monitoring
port: web
+ enableFeatures: []
+ externalLabels: {}
externalUrl: http://prometheus.192.168.1.15.nip.io
- image: prom/prometheus:v2.19.1
+ image: quay.io/prometheus/prometheus:v2.41.0
nodeSelector:
kubernetes.io/os: linux
+ podMetadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
+ probeNamespaceSelector: {}
+ probeSelector: {}
replicas: 1
resources:
requests:
memory: 400Mi
retention: 15d
- ruleSelector:
- matchLabels:
- prometheus: k8s
- role: alert-rules
+ ruleNamespaceSelector: {}
+ ruleSelector: {}
scrapeInterval: 30s
scrapeTimeout: 30s
securityContext:
@@ -49,4 +63,4 @@ spec:
serviceAccountName: prometheus-k8s
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
- version: v2.19.1
+ version: 2.41.0
diff --git a/manifests/prometheus-prometheusRule.yaml b/manifests/prometheus-prometheusRule.yaml
new file mode 100644
index 0000000..34600e6
--- /dev/null
+++ b/manifests/prometheus-prometheusRule.yaml
@@ -0,0 +1,280 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
+ prometheus: k8s
+ role: alert-rules
+ name: prometheus-k8s-prometheus-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: prometheus
+ rules:
+ - alert: PrometheusBadConfig
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig
+ summary: Failed Prometheus configuration reload.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0
+ for: 10m
+ labels:
+ severity: critical
+ - alert: PrometheusNotificationQueueRunningFull
+ annotations:
+ description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull
+ summary: Prometheus alert notification queue predicted to run full in less than 30m.
+ expr: |
+ # Without min_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30)
+ >
+ min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m])
+ )
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
+ annotations:
+ description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers
+ summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
+ expr: |
+ (
+ rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
+ /
+ rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
+ )
+ * 100
+ > 1
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusNotConnectedToAlertmanagers
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers
+ summary: Prometheus is not connected to any Alertmanagers.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBReloadsFailing
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing
+ summary: Prometheus has issues reloading blocks from disk.
+ expr: |
+ increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
+ for: 4h
+ labels:
+ severity: warning
+ - alert: PrometheusTSDBCompactionsFailing
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing
+ summary: Prometheus has issues compacting blocks.
+ expr: |
+ increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
+ for: 4h
+ labels:
+ severity: warning
+ - alert: PrometheusNotIngestingSamples
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples
+ summary: Prometheus is not ingesting samples.
+ expr: |
+ (
+ rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
+ and
+ (
+ sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
+ or
+ sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
+ )
+ )
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusDuplicateTimestamps
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps
+ summary: Prometheus is dropping samples with duplicate timestamps.
+ expr: |
+ rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusOutOfOrderTimestamps
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps
+ summary: Prometheus drops samples with out-of-order timestamps.
+ expr: |
+ rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 10m
+ labels:
+ severity: warning
+ - alert: PrometheusRemoteStorageFailures
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures
+ summary: Prometheus fails to send samples to remote storage.
+ expr: |
+ (
+ (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
+ /
+ (
+ (rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
+ +
+ (rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]) or rate(prometheus_remote_storage_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m]))
+ )
+ )
+ * 100
+ > 1
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusRemoteWriteBehind
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind
+ summary: Prometheus remote write is behind.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
+ - ignoring(remote_name, url) group_right
+ max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
+ )
+ > 120
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusRemoteWriteDesiredShards
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards
+ summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
+ expr: |
+ # Without max_over_time, failed scrapes could create false negatives, see
+ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
+ (
+ max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
+ >
+ max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
+ )
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusRuleFailures
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures
+ summary: Prometheus is failing rule evaluations.
+ expr: |
+ increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: critical
+ - alert: PrometheusMissingRuleEvaluations
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations
+ summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
+ expr: |
+ increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusTargetLimitHit
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit
+ summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
+ expr: |
+ increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusLabelLimitHit
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit
+ summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
+ expr: |
+ increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusScrapeBodySizeLimitHit
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured body_size_limit.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit
+ summary: Prometheus has dropped some targets that exceeded body size limit.
+ expr: |
+ increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusScrapeSampleLimitHit
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed {{ printf "%.0f" $value }} scrapes in the last 5m because some targets exceeded the configured sample_limit.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit
+ summary: Prometheus has failed scrapes that have exceeded the configured sample limit.
+ expr: |
+ increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusTargetSyncFailure
+ annotations:
+ description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure
+ summary: Prometheus has failed to sync targets.
+ expr: |
+ increase(prometheus_target_sync_failed_total{job="prometheus-k8s",namespace="monitoring"}[30m]) > 0
+ for: 5m
+ labels:
+ severity: critical
+ - alert: PrometheusHighQueryLoad
+ annotations:
+ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API has less than 20% available capacity in its query engine for the last 15 minutes.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload
+ summary: Prometheus is reaching its maximum capacity serving concurrent requests.
+ expr: |
+ avg_over_time(prometheus_engine_queries{job="prometheus-k8s",namespace="monitoring"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.8
+ for: 15m
+ labels:
+ severity: warning
+ - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+ annotations:
+ description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager
+ summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+ expr: |
+ min without (alertmanager) (
+ rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
+ /
+ rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
+ )
+ * 100
+ > 3
+ for: 15m
+ labels:
+ severity: critical
diff --git a/manifests/prometheus-roleBindingConfig.yaml b/manifests/prometheus-roleBindingConfig.yaml
index ec0129d..27685aa 100644
--- a/manifests/prometheus-roleBindingConfig.yaml
+++ b/manifests/prometheus-roleBindingConfig.yaml
@@ -1,6 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s-config
namespace: monitoring
roleRef:
diff --git a/manifests/prometheus-roleBindingSpecificNamespaces.yaml b/manifests/prometheus-roleBindingSpecificNamespaces.yaml
index c7527f6..6658f10 100644
--- a/manifests/prometheus-roleBindingSpecificNamespaces.yaml
+++ b/manifests/prometheus-roleBindingSpecificNamespaces.yaml
@@ -3,6 +3,12 @@ items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: default
roleRef:
@@ -16,6 +22,12 @@ items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: kube-system
roleRef:
@@ -29,6 +41,12 @@ items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
roleRef:
diff --git a/manifests/prometheus-roleConfig.yaml b/manifests/prometheus-roleConfig.yaml
index 5f1cd04..311bfbf 100644
--- a/manifests/prometheus-roleConfig.yaml
+++ b/manifests/prometheus-roleConfig.yaml
@@ -1,6 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s-config
namespace: monitoring
rules:
diff --git a/manifests/prometheus-roleSpecificNamespaces.yaml b/manifests/prometheus-roleSpecificNamespaces.yaml
index b920b88..cbdf10a 100644
--- a/manifests/prometheus-roleSpecificNamespaces.yaml
+++ b/manifests/prometheus-roleSpecificNamespaces.yaml
@@ -3,6 +3,12 @@ items:
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: default
rules:
@@ -16,9 +22,31 @@ items:
- get
- list
- watch
+ - apiGroups:
+ - extensions
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - networking.k8s.io
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: kube-system
rules:
@@ -32,9 +60,31 @@ items:
- get
- list
- watch
+ - apiGroups:
+ - extensions
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - networking.k8s.io
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
rules:
@@ -48,4 +98,20 @@ items:
- get
- list
- watch
+ - apiGroups:
+ - extensions
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - networking.k8s.io
+ resources:
+ - ingresses
+ verbs:
+ - get
+ - list
+ - watch
kind: RoleList
diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml
deleted file mode 100644
index 722a797..0000000
--- a/manifests/prometheus-rules.yaml
+++ /dev/null
@@ -1,1759 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: PrometheusRule
-metadata:
- labels:
- prometheus: k8s
- role: alert-rules
- name: prometheus-k8s-rules
- namespace: monitoring
-spec:
- groups:
- - name: node-exporter.rules
- rules:
- - expr: |
- count without (cpu) (
- count without (mode) (
- node_cpu_seconds_total{job="node-exporter"}
- )
- )
- record: instance:node_num_cpu:sum
- - expr: |
- 1 - avg without (cpu, mode) (
- rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
- )
- record: instance:node_cpu_utilisation:rate1m
- - expr: |
- (
- node_load1{job="node-exporter"}
- /
- instance:node_num_cpu:sum{job="node-exporter"}
- )
- record: instance:node_load1_per_cpu:ratio
- - expr: |
- 1 - (
- node_memory_MemAvailable_bytes{job="node-exporter"}
- /
- node_memory_MemTotal_bytes{job="node-exporter"}
- )
- record: instance:node_memory_utilisation:ratio
- - expr: |
- rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
- record: instance:node_vmstat_pgmajfault:rate1m
- - expr: |
- rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
- record: instance_device:node_disk_io_time_seconds:rate1m
- - expr: |
- rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
- record: instance_device:node_disk_io_time_weighted_seconds:rate1m
- - expr: |
- sum without (device) (
- rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
- )
- record: instance:node_network_receive_bytes_excluding_lo:rate1m
- - expr: |
- sum without (device) (
- rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
- )
- record: instance:node_network_transmit_bytes_excluding_lo:rate1m
- - expr: |
- sum without (device) (
- rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
- )
- record: instance:node_network_receive_drop_excluding_lo:rate1m
- - expr: |
- sum without (device) (
- rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
- )
- record: instance:node_network_transmit_drop_excluding_lo:rate1m
- - name: kube-apiserver.rules
- rules:
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
- labels:
- verb: read
- record: apiserver_request:burnrate1d
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
- labels:
- verb: read
- record: apiserver_request:burnrate1h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
- labels:
- verb: read
- record: apiserver_request:burnrate2h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
- labels:
- verb: read
- record: apiserver_request:burnrate30m
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
- labels:
- verb: read
- record: apiserver_request:burnrate3d
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
- labels:
- verb: read
- record: apiserver_request:burnrate5m
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
- -
- (
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
- )
- )
- +
- # errors
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
- labels:
- verb: read
- record: apiserver_request:burnrate6h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
- labels:
- verb: write
- record: apiserver_request:burnrate1d
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
- labels:
- verb: write
- record: apiserver_request:burnrate1h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
- labels:
- verb: write
- record: apiserver_request:burnrate2h
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
- labels:
- verb: write
- record: apiserver_request:burnrate30m
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
- labels:
- verb: write
- record: apiserver_request:burnrate3d
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
- labels:
- verb: write
- record: apiserver_request:burnrate5m
- - expr: |
- (
- (
- # too slow
- sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
- -
- sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
- )
- +
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
- )
- /
- sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
- labels:
- verb: write
- record: apiserver_request:burnrate6h
- - expr: |
- sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
- labels:
- verb: read
- record: code_resource:apiserver_request_total:rate5m
- - expr: |
- sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
- labels:
- verb: write
- record: code_resource:apiserver_request_total:rate5m
- - expr: |
- histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
- labels:
- quantile: "0.99"
- verb: read
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
- labels:
- quantile: "0.99"
- verb: write
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: |
- sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
- /
- sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
- record: cluster:apiserver_request_duration_seconds:mean5m
- - expr: |
- histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
- labels:
- quantile: "0.99"
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
- labels:
- quantile: "0.9"
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod))
- labels:
- quantile: "0.5"
- record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- - interval: 3m
- name: kube-apiserver-availability.rules
- rules:
- - expr: |
- 1 - (
- (
- # write too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
- -
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
- ) +
- (
- # read too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
- -
- (
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
- )
- ) +
- # errors
- sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
- )
- /
- sum(code:apiserver_request_total:increase30d)
- labels:
- verb: all
- record: apiserver_request:availability30d
- - expr: |
- 1 - (
- sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
- -
- (
- # too slow
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
- sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
- )
- +
- # errors
- sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
- )
- /
- sum(code:apiserver_request_total:increase30d{verb="read"})
- labels:
- verb: read
- record: apiserver_request:availability30d
- - expr: |
- 1 - (
- (
- # too slow
- sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
- -
- sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
- )
- +
- # errors
- sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
- )
- /
- sum(code:apiserver_request_total:increase30d{verb="write"})
- labels:
- verb: write
- record: apiserver_request:availability30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d]))
- record: code_verb:apiserver_request_total:increase30d
- - expr: |
- sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
- labels:
- verb: read
- record: code:apiserver_request_total:increase30d
- - expr: |
- sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
- labels:
- verb: write
- record: code:apiserver_request_total:increase30d
- - name: k8s.rules
- rules:
- - expr: |
- sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace)
- record: namespace:container_cpu_usage_seconds_total:sum_rate
- - expr: |
- sum by (cluster, namespace, pod, container) (
- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
- ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
- 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
- - expr: |
- container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
- max by(namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_memory_working_set_bytes
- - expr: |
- container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
- max by(namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_memory_rss
- - expr: |
- container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
- max by(namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_memory_cache
- - expr: |
- container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
- max by(namespace, pod, node) (kube_pod_info{node!=""})
- )
- record: node_namespace_pod_container:container_memory_swap
- - expr: |
- sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
- record: namespace:container_memory_usage_bytes:sum
- - expr: |
- sum by (namespace) (
- sum by (namespace, pod) (
- max by (namespace, pod, container) (
- kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}
- ) * on(namespace, pod) group_left() max by (namespace, pod) (
- kube_pod_status_phase{phase=~"Pending|Running"} == 1
- )
- )
- )
- record: namespace:kube_pod_container_resource_requests_memory_bytes:sum
- - expr: |
- sum by (namespace) (
- sum by (namespace, pod) (
- max by (namespace, pod, container) (
- kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"}
- ) * on(namespace, pod) group_left() max by (namespace, pod) (
- kube_pod_status_phase{phase=~"Pending|Running"} == 1
- )
- )
- )
- record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
- - expr: |
- max by (cluster, namespace, workload, pod) (
- label_replace(
- label_replace(
- kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
- "replicaset", "$1", "owner_name", "(.*)"
- ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
- 1, max by (replicaset, namespace, owner_name) (
- kube_replicaset_owner{job="kube-state-metrics"}
- )
- ),
- "workload", "$1", "owner_name", "(.*)"
- )
- )
- labels:
- workload_type: deployment
- record: mixin_pod_workload
- - expr: |
- max by (cluster, namespace, workload, pod) (
- label_replace(
- kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
- "workload", "$1", "owner_name", "(.*)"
- )
- )
- labels:
- workload_type: daemonset
- record: mixin_pod_workload
- - expr: |
- max by (cluster, namespace, workload, pod) (
- label_replace(
- kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
- "workload", "$1", "owner_name", "(.*)"
- )
- )
- labels:
- workload_type: statefulset
- record: mixin_pod_workload
- - name: kube-scheduler.rules
- rules:
- - expr: |
- histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.99"
- record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.99"
- record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.99"
- record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.9"
- record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.9"
- record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.9"
- record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.5"
- record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.5"
- record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
- labels:
- quantile: "0.5"
- record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- - name: node.rules
- rules:
- - expr: |
- sum(min(kube_pod_info{node!=""}) by (cluster, node))
- record: ':kube_pod_info_node_count:'
- - expr: |
- topk by(namespace, pod) (1,
- max by (node, namespace, pod) (
- label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)")
- ))
- record: 'node_namespace_pod:kube_pod_info:'
- - expr: |
- count by (cluster, node) (sum by (node, cpu) (
- node_cpu_seconds_total{job="node-exporter"}
- * on (namespace, pod) group_left(node)
- node_namespace_pod:kube_pod_info:
- ))
- record: node:node_num_cpu:sum
- - expr: |
- sum(
- node_memory_MemAvailable_bytes{job="node-exporter"} or
- (
- node_memory_Buffers_bytes{job="node-exporter"} +
- node_memory_Cached_bytes{job="node-exporter"} +
- node_memory_MemFree_bytes{job="node-exporter"} +
- node_memory_Slab_bytes{job="node-exporter"}
- )
- ) by (cluster)
- record: :node_memory_MemAvailable_bytes:sum
- - name: kubelet.rules
- rules:
- - expr: |
- histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
- labels:
- quantile: "0.99"
- record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
- labels:
- quantile: "0.9"
- record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- - expr: |
- histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
- labels:
- quantile: "0.5"
- record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- - name: kube-prometheus-node-recording.rules
- rules:
- - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
- record: instance:node_cpu:rate:sum
- - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance)
- record: instance:node_filesystem_usage:sum
- - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
- record: instance:node_network_receive_bytes:rate:sum
- - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
- record: instance:node_network_transmit_bytes:rate:sum
- - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
- record: instance:node_cpu:ratio
- - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
- record: cluster:node_cpu:sum_rate5m
- - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
- record: cluster:node_cpu:ratio
- - name: kube-prometheus-general.rules
- rules:
- - expr: count without(instance, pod, node) (up == 1)
- record: count:up1
- - expr: count without(instance, pod, node) (up == 0)
- record: count:up0
- - name: kube-state-metrics
- rules:
- - alert: KubeStateMetricsListErrors
- annotations:
- message: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
- expr: |
- (sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
- /
- sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
- > 0.01
- for: 15m
- labels:
- severity: critical
- - alert: KubeStateMetricsWatchErrors
- annotations:
- message: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors
- expr: |
- (sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
- /
- sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
- > 0.01
- for: 15m
- labels:
- severity: critical
- - name: node-exporter
- rules:
- - alert: NodeFilesystemSpaceFillingUp
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
- summary: Filesystem is predicted to run out of space within the next 24 hours.
- expr: |
- (
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
- and
- predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: warning
- - alert: NodeFilesystemSpaceFillingUp
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
- summary: Filesystem is predicted to run out of space within the next 4 hours.
- expr: |
- (
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
- and
- predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: critical
- - alert: NodeFilesystemAlmostOutOfSpace
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
- summary: Filesystem has less than 5% space left.
- expr: |
- (
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: warning
- - alert: NodeFilesystemAlmostOutOfSpace
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
- summary: Filesystem has less than 3% space left.
- expr: |
- (
- node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: critical
- - alert: NodeFilesystemFilesFillingUp
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
- summary: Filesystem is predicted to run out of inodes within the next 24 hours.
- expr: |
- (
- node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
- and
- predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: warning
- - alert: NodeFilesystemFilesFillingUp
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
- summary: Filesystem is predicted to run out of inodes within the next 4 hours.
- expr: |
- (
- node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
- and
- predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: critical
- - alert: NodeFilesystemAlmostOutOfFiles
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
- summary: Filesystem has less than 5% inodes left.
- expr: |
- (
- node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: warning
- - alert: NodeFilesystemAlmostOutOfFiles
- annotations:
- description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
- summary: Filesystem has less than 3% inodes left.
- expr: |
- (
- node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3
- and
- node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
- )
- for: 1h
- labels:
- severity: critical
- - alert: NodeNetworkReceiveErrs
- annotations:
- description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
- summary: Network interface is reporting many receive errors.
- expr: |
- increase(node_network_receive_errs_total[2m]) > 10
- for: 1h
- labels:
- severity: warning
- - alert: NodeNetworkTransmitErrs
- annotations:
- description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
- summary: Network interface is reporting many transmit errors.
- expr: |
- increase(node_network_transmit_errs_total[2m]) > 10
- for: 1h
- labels:
- severity: warning
- - alert: NodeHighNumberConntrackEntriesUsed
- annotations:
- description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
- summary: Number of conntrack are getting close to the limit.
- expr: |
- (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
- labels:
- severity: warning
- - alert: NodeTextFileCollectorScrapeError
- annotations:
- description: Node Exporter text file collector failed to scrape.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror
- summary: Node Exporter text file collector failed to scrape.
- expr: |
- node_textfile_scrape_error{job="node-exporter"} == 1
- labels:
- severity: warning
- - alert: NodeClockSkewDetected
- annotations:
- message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
- summary: Clock skew detected.
- expr: |
- (
- node_timex_offset_seconds > 0.05
- and
- deriv(node_timex_offset_seconds[5m]) >= 0
- )
- or
- (
- node_timex_offset_seconds < -0.05
- and
- deriv(node_timex_offset_seconds[5m]) <= 0
- )
- for: 10m
- labels:
- severity: warning
- - alert: NodeClockNotSynchronising
- annotations:
- message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
- summary: Clock not synchronising.
- expr: |
- min_over_time(node_timex_sync_status[5m]) == 0
- for: 10m
- labels:
- severity: warning
- - name: kubernetes-apps
- rules:
- - alert: KubePodCrashLooping
- annotations:
- message: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
- expr: |
- rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
- for: 15m
- labels:
- severity: warning
- - alert: KubePodNotReady
- annotations:
- message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
- expr: |
- sum by (namespace, pod) (
- max by(namespace, pod) (
- kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}
- ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (
- 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})
- )
- ) > 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeDeploymentGenerationMismatch
- annotations:
- message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
- expr: |
- kube_deployment_status_observed_generation{job="kube-state-metrics"}
- !=
- kube_deployment_metadata_generation{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: warning
- - alert: KubeDeploymentReplicasMismatch
- annotations:
- message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
- expr: |
- (
- kube_deployment_spec_replicas{job="kube-state-metrics"}
- !=
- kube_deployment_status_replicas_available{job="kube-state-metrics"}
- ) and (
- changes(kube_deployment_status_replicas_updated{job="kube-state-metrics"}[5m])
- ==
- 0
- )
- for: 15m
- labels:
- severity: warning
- - alert: KubeStatefulSetReplicasMismatch
- annotations:
- message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
- expr: |
- (
- kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
- !=
- kube_statefulset_status_replicas{job="kube-state-metrics"}
- ) and (
- changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m])
- ==
- 0
- )
- for: 15m
- labels:
- severity: warning
- - alert: KubeStatefulSetGenerationMismatch
- annotations:
- message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
- expr: |
- kube_statefulset_status_observed_generation{job="kube-state-metrics"}
- !=
- kube_statefulset_metadata_generation{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: warning
- - alert: KubeStatefulSetUpdateNotRolledOut
- annotations:
- message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
- expr: |
- (
- max without (revision) (
- kube_statefulset_status_current_revision{job="kube-state-metrics"}
- unless
- kube_statefulset_status_update_revision{job="kube-state-metrics"}
- )
- *
- (
- kube_statefulset_replicas{job="kube-state-metrics"}
- !=
- kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
- )
- ) and (
- changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m])
- ==
- 0
- )
- for: 15m
- labels:
- severity: warning
- - alert: KubeDaemonSetRolloutStuck
- annotations:
- message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
- expr: |
- kube_daemonset_status_number_ready{job="kube-state-metrics"}
- /
- kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
- for: 15m
- labels:
- severity: warning
- - alert: KubeContainerWaiting
- annotations:
- message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
- expr: |
- sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
- for: 1h
- labels:
- severity: warning
- - alert: KubeDaemonSetNotScheduled
- annotations:
- message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
- expr: |
- kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
- -
- kube_daemonset_status_current_number_scheduled{job="kube-state-metrics"} > 0
- for: 10m
- labels:
- severity: warning
- - alert: KubeDaemonSetMisScheduled
- annotations:
- message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
- expr: |
- kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeCronJobRunning
- annotations:
- message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
- expr: |
- time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
- for: 1h
- labels:
- severity: warning
- - alert: KubeJobCompletion
- annotations:
- message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than one hour to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
- expr: |
- kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
- for: 1h
- labels:
- severity: warning
- - alert: KubeJobFailed
- annotations:
- message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
- expr: |
- kube_job_failed{job="kube-state-metrics"} > 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeHpaReplicasMismatch
- annotations:
- message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
- expr: |
- (kube_hpa_status_desired_replicas{job="kube-state-metrics"}
- !=
- kube_hpa_status_current_replicas{job="kube-state-metrics"})
- and
- changes(kube_hpa_status_current_replicas[15m]) == 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeHpaMaxedOut
- annotations:
- message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
- expr: |
- kube_hpa_status_current_replicas{job="kube-state-metrics"}
- ==
- kube_hpa_spec_max_replicas{job="kube-state-metrics"}
- for: 15m
- labels:
- severity: warning
- - name: kubernetes-resources
- rules:
- - alert: KubeCPUOvercommit
- annotations:
- message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
- expr: |
- sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})
- /
- sum(kube_node_status_allocatable_cpu_cores)
- >
- (count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores)
- for: 5m
- labels:
- severity: warning
- - alert: KubeMemoryOvercommit
- annotations:
- message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
- expr: |
- sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
- /
- sum(kube_node_status_allocatable_memory_bytes)
- >
- (count(kube_node_status_allocatable_memory_bytes)-1)
- /
- count(kube_node_status_allocatable_memory_bytes)
- for: 5m
- labels:
- severity: warning
- - alert: KubeCPUQuotaOvercommit
- annotations:
- message: Cluster has overcommitted CPU resource requests for Namespaces.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit
- expr: |
- sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
- /
- sum(kube_node_status_allocatable_cpu_cores)
- > 1.5
- for: 5m
- labels:
- severity: warning
- - alert: KubeMemoryQuotaOvercommit
- annotations:
- message: Cluster has overcommitted memory resource requests for Namespaces.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit
- expr: |
- sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
- /
- sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"})
- > 1.5
- for: 5m
- labels:
- severity: warning
- - alert: KubeQuotaExceeded
- annotations:
- message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
- expr: |
- kube_resourcequota{job="kube-state-metrics", type="used"}
- / ignoring(instance, job, type)
- (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
- > 0.90
- for: 15m
- labels:
- severity: warning
- - alert: CPUThrottlingHigh
- annotations:
- message: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
- expr: |
- sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
- /
- sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)
- > ( 25 / 100 )
- for: 15m
- labels:
- severity: warning
- - name: kubernetes-storage
- rules:
- - alert: KubePersistentVolumeFillingUp
- annotations:
- message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
- expr: |
- kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
- /
- kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
- < 0.03
- for: 1m
- labels:
- severity: critical
- - alert: KubePersistentVolumeFillingUp
- annotations:
- message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
- expr: |
- (
- kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
- /
- kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
- ) < 0.15
- and
- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
- for: 1h
- labels:
- severity: warning
- - alert: KubePersistentVolumeErrors
- annotations:
- message: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
- expr: |
- kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
- for: 5m
- labels:
- severity: critical
- - name: kubernetes-system
- rules:
- - alert: KubeVersionMismatch
- annotations:
- message: There are {{ $value }} different semantic versions of Kubernetes components running.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
- expr: |
- count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
- for: 15m
- labels:
- severity: warning
- - alert: KubeClientErrors
- annotations:
- message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
- expr: |
- (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
- /
- sum(rate(rest_client_requests_total[5m])) by (instance, job))
- > 0.01
- for: 15m
- labels:
- severity: warning
- - name: kube-apiserver-slos
- rules:
- - alert: KubeAPIErrorBudgetBurn
- annotations:
- message: The API server is burning too much error budget
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
- expr: |
- sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
- and
- sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
- for: 2m
- labels:
- long: 1h
- severity: critical
- short: 5m
- - alert: KubeAPIErrorBudgetBurn
- annotations:
- message: The API server is burning too much error budget
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
- expr: |
- sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
- and
- sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
- for: 15m
- labels:
- long: 6h
- severity: critical
- short: 30m
- - alert: KubeAPIErrorBudgetBurn
- annotations:
- message: The API server is burning too much error budget
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
- expr: |
- sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
- and
- sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
- for: 1h
- labels:
- long: 1d
- severity: warning
- short: 2h
- - alert: KubeAPIErrorBudgetBurn
- annotations:
- message: The API server is burning too much error budget
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
- expr: |
- sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
- and
- sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
- for: 3h
- labels:
- long: 3d
- severity: warning
- short: 6h
- - name: kubernetes-system-apiserver
- rules:
- - alert: KubeAPILatencyHigh
- annotations:
- message: The API server has an abnormal latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
- expr: |
- cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
- >
- 1
- and on (verb,resource)
- (
- cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
- >
- on (verb) group_left()
- (
- avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
- +
- 2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
- )
- ) > on (verb) group_left()
- 1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
- for: 5m
- labels:
- severity: warning
- - alert: KubeAPIErrorsHigh
- annotations:
- message: API server is returning errors for {{ $value | humanizePercentage }} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
- expr: |
- sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
- /
- sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05
- for: 10m
- labels:
- severity: warning
- - alert: KubeClientCertificateExpiration
- annotations:
- message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- expr: |
- apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
- labels:
- severity: warning
- - alert: KubeClientCertificateExpiration
- annotations:
- message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
- expr: |
- apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
- labels:
- severity: critical
- - alert: AggregatedAPIErrors
- annotations:
- message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
- expr: |
- sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
- labels:
- severity: warning
- - alert: AggregatedAPIDown
- annotations:
- message: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down. It has not been available at least for the past five minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
- expr: |
- sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0
- for: 5m
- labels:
- severity: warning
- - alert: KubeAPIDown
- annotations:
- message: KubeAPI has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
- expr: |
- absent(up{job="apiserver"} == 1)
- for: 15m
- labels:
- severity: critical
- - name: kubernetes-system-kubelet
- rules:
- - alert: KubeNodeNotReady
- annotations:
- message: '{{ $labels.node }} has been unready for more than 15 minutes.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
- expr: |
- kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
- for: 15m
- labels:
- severity: warning
- - alert: KubeNodeUnreachable
- annotations:
- message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
- expr: |
- (kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key="ToBeDeletedByClusterAutoscaler"}) == 1
- labels:
- severity: warning
- - alert: KubeletTooManyPods
- annotations:
- message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
- expr: |
- max(max(kubelet_running_pod_count{job="kubelet", metrics_path="/metrics"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"} != 1) by(node) > 0.95
- for: 15m
- labels:
- severity: warning
- - alert: KubeNodeReadinessFlapping
- annotations:
- message: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
- expr: |
- sum(changes(kube_node_status_condition{status="true",condition="Ready"}[15m])) by (node) > 2
- for: 15m
- labels:
- severity: warning
- - alert: KubeletPlegDurationHigh
- annotations:
- message: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
- expr: |
- node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10
- for: 5m
- labels:
- severity: warning
- - alert: KubeletPodStartUpLatencyHigh
- annotations:
- message: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
- expr: |
- histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
- for: 15m
- labels:
- severity: warning
- - alert: KubeletDown
- annotations:
- message: Kubelet has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
- expr: |
- absent(up{job="kubelet", metrics_path="/metrics"} == 1)
- for: 15m
- labels:
- severity: critical
- - name: kubernetes-system-scheduler
- rules:
- - alert: KubeSchedulerDown
- annotations:
- message: KubeScheduler has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
- expr: |
- absent(up{job="kube-scheduler"} == 1)
- for: 15m
- labels:
- severity: critical
- - name: kubernetes-system-controller-manager
- rules:
- - alert: KubeControllerManagerDown
- annotations:
- message: KubeControllerManager has disappeared from Prometheus target discovery.
- runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
- expr: |
- absent(up{job="kube-controller-manager"} == 1)
- for: 15m
- labels:
- severity: critical
- - name: prometheus
- rules:
- - alert: PrometheusBadConfig
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
- summary: Failed Prometheus configuration reload.
- expr: |
- # Without max_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0
- for: 10m
- labels:
- severity: critical
- - alert: PrometheusNotificationQueueRunningFull
- annotations:
- description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
- summary: Prometheus alert notification queue predicted to run full in less than 30m.
- expr: |
- # Without min_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- (
- predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30)
- >
- min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- for: 15m
- labels:
- severity: warning
- - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
- annotations:
- description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
- summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
- expr: |
- (
- rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- /
- rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- * 100
- > 1
- for: 15m
- labels:
- severity: warning
- - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
- annotations:
- description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
- summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
- expr: |
- min without(alertmanager) (
- rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- /
- rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- * 100
- > 3
- for: 15m
- labels:
- severity: critical
- - alert: PrometheusNotConnectedToAlertmanagers
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
- summary: Prometheus is not connected to any Alertmanagers.
- expr: |
- # Without max_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusTSDBReloadsFailing
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
- summary: Prometheus has issues reloading blocks from disk.
- expr: |
- increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
- for: 4h
- labels:
- severity: warning
- - alert: PrometheusTSDBCompactionsFailing
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
- summary: Prometheus has issues compacting blocks.
- expr: |
- increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
- for: 4h
- labels:
- severity: warning
- - alert: PrometheusNotIngestingSamples
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
- summary: Prometheus is not ingesting samples.
- expr: |
- rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusDuplicateTimestamps
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
- summary: Prometheus is dropping samples with duplicate timestamps.
- expr: |
- rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusOutOfOrderTimestamps
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
- summary: Prometheus drops samples with out-of-order timestamps.
- expr: |
- rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusRemoteStorageFailures
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
- summary: Prometheus fails to send samples to remote storage.
- expr: |
- (
- rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- /
- (
- rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- +
- rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- )
- * 100
- > 1
- for: 15m
- labels:
- severity: critical
- - alert: PrometheusRemoteWriteBehind
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
- summary: Prometheus remote write is behind.
- expr: |
- # Without max_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- (
- max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
- - on(job, instance) group_right
- max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- > 120
- for: 15m
- labels:
- severity: critical
- - alert: PrometheusRemoteWriteDesiredShards
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
- summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
- expr: |
- # Without max_over_time, failed scrapes could create false negatives, see
- # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
- (
- max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
- >
- max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
- )
- for: 15m
- labels:
- severity: warning
- - alert: PrometheusRuleFailures
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
- summary: Prometheus is failing rule evaluations.
- expr: |
- increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
- for: 15m
- labels:
- severity: critical
- - alert: PrometheusMissingRuleEvaluations
- annotations:
- description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
- summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
- expr: |
- increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
- for: 15m
- labels:
- severity: warning
- - name: alertmanager.rules
- rules:
- - alert: AlertmanagerConfigInconsistent
- annotations:
- message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.
- expr: |
- count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
- for: 5m
- labels:
- severity: critical
- - alert: AlertmanagerFailedReload
- annotations:
- message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.
- expr: |
- alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
- for: 10m
- labels:
- severity: warning
- - alert: AlertmanagerMembersInconsistent
- annotations:
- message: Alertmanager has not found all other members of the cluster.
- expr: |
- alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}
- != on (service) GROUP_LEFT()
- count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"})
- for: 5m
- labels:
- severity: critical
- - name: general.rules
- rules:
- - alert: TargetDown
- annotations:
- message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
- expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
- for: 10m
- labels:
- severity: warning
- - alert: Watchdog
- annotations:
- message: |
- This is an alert meant to ensure that the entire alerting pipeline is functional.
- This alert is always firing, therefore it should always be firing in Alertmanager
- and always fire against a receiver. There are integrations with various notification
- mechanisms that send a notification when this alert is not firing. For example the
- "DeadMansSnitch" integration in PagerDuty.
- expr: vector(1)
- labels:
- severity: none
- - name: node-network
- rules:
- - alert: NodeNetworkInterfaceFlapping
- annotations:
- message: Network interface "{{ $labels.device }}" changing it's up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
- expr: |
- changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
- for: 2m
- labels:
- severity: warning
- - name: prometheus-operator
- rules:
- - alert: PrometheusOperatorReconcileErrors
- annotations:
- message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.
- expr: |
- rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
- for: 10m
- labels:
- severity: warning
- - alert: PrometheusOperatorNodeLookupErrors
- annotations:
- message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
- expr: |
- rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
- for: 10m
- labels:
- severity: warning
diff --git a/manifests/prometheus-service.yaml b/manifests/prometheus-service.yaml
index 4f61e88..23ea918 100644
--- a/manifests/prometheus-service.yaml
+++ b/manifests/prometheus-service.yaml
@@ -2,7 +2,11 @@ apiVersion: v1
kind: Service
metadata:
labels:
- prometheus: k8s
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
spec:
@@ -10,7 +14,12 @@ spec:
- name: web
port: 9090
targetPort: web
+ - name: reloader-web
+ port: 8080
+ targetPort: reloader-web
selector:
- app: prometheus
- prometheus: k8s
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
sessionAffinity: ClientIP
diff --git a/manifests/prometheus-serviceAccount.yaml b/manifests/prometheus-serviceAccount.yaml
index 3e55fad..2a4ada1 100644
--- a/manifests/prometheus-serviceAccount.yaml
+++ b/manifests/prometheus-serviceAccount.yaml
@@ -1,5 +1,12 @@
apiVersion: v1
+automountServiceAccountToken: true
kind: ServiceAccount
metadata:
+ labels:
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
name: prometheus-k8s
namespace: monitoring
diff --git a/manifests/prometheus-serviceMonitor.yaml b/manifests/prometheus-serviceMonitor.yaml
index b7605db..936b449 100644
--- a/manifests/prometheus-serviceMonitor.yaml
+++ b/manifests/prometheus-serviceMonitor.yaml
@@ -2,13 +2,22 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
- k8s-app: prometheus
- name: prometheus
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ app.kubernetes.io/version: 2.41.0
+ name: prometheus-k8s
namespace: monitoring
spec:
endpoints:
- interval: 30s
port: web
+ - interval: 30s
+ port: reloader-web
selector:
matchLabels:
- prometheus: k8s
+ app.kubernetes.io/component: prometheus
+ app.kubernetes.io/instance: k8s
+ app.kubernetes.io/name: prometheus
+ app.kubernetes.io/part-of: kube-prometheus
diff --git a/manifests/prometheus-serviceMonitorApiserver.yaml b/manifests/prometheus-serviceMonitorApiserver.yaml
deleted file mode 100644
index 500c0d3..0000000
--- a/manifests/prometheus-serviceMonitorApiserver.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: apiserver
- name: kube-apiserver
- namespace: monitoring
-spec:
- endpoints:
- - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- interval: 30s
- metricRelabelings:
- - action: drop
- regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
- sourceLabels:
- - __name__
- - action: drop
- regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
- sourceLabels:
- - __name__
- - action: drop
- regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
- sourceLabels:
- - __name__
- - action: drop
- regex: transformation_(transformation_latencies_microseconds|failures_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(debugging|disk|request|server).*
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_admission_controller_admission_latencies_seconds_.*
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_admission_step_admission_latencies_seconds_.*
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
- sourceLabels:
- - __name__
- - le
- port: https
- scheme: https
- tlsConfig:
- caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- serverName: kubernetes
- jobLabel: component
- namespaceSelector:
- matchNames:
- - default
- selector:
- matchLabels:
- component: apiserver
- provider: kubernetes
diff --git a/manifests/prometheus-serviceMonitorCoreDNS.yaml b/manifests/prometheus-serviceMonitorCoreDNS.yaml
deleted file mode 100644
index 633aa18..0000000
--- a/manifests/prometheus-serviceMonitorCoreDNS.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: coredns
- name: coredns
- namespace: monitoring
-spec:
- endpoints:
- - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- interval: 15s
- port: metrics
- jobLabel: k8s-app
- namespaceSelector:
- matchNames:
- - kube-system
- selector:
- matchLabels:
- k8s-app: kube-dns
diff --git a/manifests/prometheus-serviceMonitorKubeControllerManager.yaml b/manifests/prometheus-serviceMonitorKubeControllerManager.yaml
deleted file mode 100644
index 7f20fce..0000000
--- a/manifests/prometheus-serviceMonitorKubeControllerManager.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: kube-controller-manager
- name: kube-controller-manager
- namespace: monitoring
-spec:
- endpoints:
- - interval: 30s
- metricRelabelings:
- - action: drop
- regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
- sourceLabels:
- - __name__
- - action: drop
- regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
- sourceLabels:
- - __name__
- - action: drop
- regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
- sourceLabels:
- - __name__
- - action: drop
- regex: transformation_(transformation_latencies_microseconds|failures_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(debugging|disk|request|server).*
- sourceLabels:
- - __name__
- port: http-metrics
- jobLabel: k8s-app
- namespaceSelector:
- matchNames:
- - kube-system
- selector:
- matchLabels:
- k8s-app: kube-controller-manager
diff --git a/manifests/prometheus-serviceMonitorKubeScheduler.yaml b/manifests/prometheus-serviceMonitorKubeScheduler.yaml
deleted file mode 100644
index f00db0e..0000000
--- a/manifests/prometheus-serviceMonitorKubeScheduler.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: kube-scheduler
- name: kube-scheduler
- namespace: monitoring
-spec:
- endpoints:
- - interval: 30s
- port: http-metrics
- jobLabel: k8s-app
- namespaceSelector:
- matchNames:
- - kube-system
- selector:
- matchLabels:
- k8s-app: kube-scheduler
diff --git a/manifests/prometheus-serviceMonitorKubelet.yaml b/manifests/prometheus-serviceMonitorKubelet.yaml
deleted file mode 100644
index 6ee73fd..0000000
--- a/manifests/prometheus-serviceMonitorKubelet.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
- labels:
- k8s-app: kubelet
- name: kubelet
- namespace: monitoring
-spec:
- endpoints:
- - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- honorLabels: true
- interval: 30s
- metricRelabelings:
- - action: drop
- regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
- sourceLabels:
- - __name__
- - action: drop
- regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
- sourceLabels:
- - __name__
- - action: drop
- regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
- sourceLabels:
- - __name__
- - action: drop
- regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
- sourceLabels:
- - __name__
- - action: drop
- regex: transformation_(transformation_latencies_microseconds|failures_total)
- sourceLabels:
- - __name__
- - action: drop
- regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
- sourceLabels:
- - __name__
- port: https-metrics
- relabelings:
- - sourceLabels:
- - __metrics_path__
- targetLabel: metrics_path
- scheme: https
- tlsConfig:
- insecureSkipVerify: true
- - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- honorLabels: true
- interval: 30s
- metricRelabelings:
- - action: drop
- regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
- sourceLabels:
- - __name__
- path: /metrics/cadvisor
- port: https-metrics
- relabelings:
- - sourceLabels:
- - __metrics_path__
- targetLabel: metrics_path
- scheme: https
- tlsConfig:
- insecureSkipVerify: true
- jobLabel: k8s-app
- namespaceSelector:
- matchNames:
- - kube-system
- selector:
- matchLabels:
- k8s-app: kubelet
diff --git a/manifests/setup/0namespace-prometheusRule.yaml b/manifests/setup/0namespace-prometheusRule.yaml
new file mode 100644
index 0000000..0facf21
--- /dev/null
+++ b/manifests/setup/0namespace-prometheusRule.yaml
@@ -0,0 +1,83 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ app.kubernetes.io/component: exporter
+ app.kubernetes.io/name: kube-prometheus
+ app.kubernetes.io/part-of: kube-prometheus
+ prometheus: k8s
+ role: alert-rules
+ name: kube-prometheus-rules
+ namespace: monitoring
+spec:
+ groups:
+ - name: general.rules
+ rules:
+ - alert: TargetDown
+ annotations:
+ description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
+ summary: One or more targets are unreachable.
+ expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
+ for: 10m
+ labels:
+ severity: warning
+ - alert: Watchdog
+ annotations:
+ description: |
+ This is an alert meant to ensure that the entire alerting pipeline is functional.
+ This alert is always firing, therefore it should always be firing in Alertmanager
+ and always fire against a receiver. There are integrations with various notification
+ mechanisms that send a notification when this alert is not firing. For example the
+ "DeadMansSnitch" integration in PagerDuty.
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
+ summary: An alert that should always be firing to certify that Alertmanager is working properly.
+ expr: vector(1)
+ labels:
+ severity: none
+ - alert: InfoInhibitor
+ annotations:
+ description: |
+ This is an alert that is used to inhibit info alerts.
+ By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
+ other alerts.
+ This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
+ severity of 'warning' or 'critical' starts firing on the same namespace.
+ This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
+ summary: Info-level alert inhibition.
+ expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
+ labels:
+ severity: none
+ - name: node-network
+ rules:
+ - alert: NodeNetworkInterfaceFlapping
+ annotations:
+ description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
+ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping
+ summary: Network interface is often changing its status
+ expr: |
+ changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
+ for: 2m
+ labels:
+ severity: warning
+ - name: kube-prometheus-node-recording.rules
+ rules:
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)
+ record: instance:node_cpu:rate:sum
+ - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
+ record: instance:node_network_receive_bytes:rate:sum
+ - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
+ record: instance:node_network_transmit_bytes:rate:sum
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
+ record: instance:node_cpu:ratio
+ - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
+ record: cluster:node_cpu:sum_rate5m
+ - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
+ record: cluster:node_cpu:ratio
+ - name: kube-prometheus-general.rules
+ rules:
+ - expr: count without(instance, pod, node) (up == 1)
+ record: count:up1
+ - expr: count without(instance, pod, node) (up == 0)
+ record: count:up0
diff --git a/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml b/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml
new file mode 100644
index 0000000..ad8336d
--- /dev/null
+++ b/manifests/setup/prometheus-operator-0alertmanagerConfigCustomResourceDefinition.yaml
@@ -0,0 +1,3401 @@
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.9.2
+ creationTimestamp: null
+ name: alertmanagerconfigs.monitoring.coreos.com
+spec:
+ group: monitoring.coreos.com
+ names:
+ categories:
+ - prometheus-operator
+ kind: AlertmanagerConfig
+ listKind: AlertmanagerConfigList
+ plural: alertmanagerconfigs
+ shortNames:
+ - amcfg
+ singular: alertmanagerconfig
+ scope: Namespaced
+ versions:
+ - name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: AlertmanagerConfig defines a namespaced AlertmanagerConfig to be aggregated across multiple namespaces configuring one Alertmanager cluster.
+ properties:
+ apiVersion:
+ description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ type: string
+ kind:
+ description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: AlertmanagerConfigSpec is a specification of the desired behavior of the Alertmanager configuration. By definition, the Alertmanager configuration only applies to alerts for which the `namespace` label is equal to the namespace of the AlertmanagerConfig resource.
+ properties:
+ inhibitRules:
+ description: List of inhibition rules. The rules will only apply to alerts matching the resource's namespace.
+ items:
+ description: InhibitRule defines an inhibition rule that allows to mute alerts when other alerts are already firing. See https://prometheus.io/docs/alerting/latest/configuration/#inhibit_rule
+ properties:
+ equal:
+ description: Labels that must have an equal value in the source and target alert for the inhibition to take effect.
+ items:
+ type: string
+ type: array
+ sourceMatch:
+ description: Matchers for which one or more alerts have to exist for the inhibition to take effect. The operator enforces that the alert matches the resource's namespace.
+ items:
+ description: Matcher defines how to match on alert's labels.
+ properties:
+ matchType:
+ description: Match operation available with AlertManager >= v0.22.0 and takes precedence over Regex (deprecated) if non-empty.
+ enum:
+ - '!='
+ - =
+ - =~
+ - '!~'
+ type: string
+ name:
+ description: Label to match.
+ minLength: 1
+ type: string
+ regex:
+ description: Whether to match on equality (false) or regular-expression (true). Deprecated as of AlertManager >= v0.22.0 where a user should use MatchType instead.
+ type: boolean
+ value:
+ description: Label value to match.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ targetMatch:
+ description: Matchers that have to be fulfilled in the alerts to be muted. The operator enforces that the alert matches the resource's namespace.
+ items:
+ description: Matcher defines how to match on alert's labels.
+ properties:
+ matchType:
+ description: Match operation available with AlertManager >= v0.22.0 and takes precedence over Regex (deprecated) if non-empty.
+ enum:
+ - '!='
+ - =
+ - =~
+ - '!~'
+ type: string
+ name:
+ description: Label to match.
+ minLength: 1
+ type: string
+ regex:
+ description: Whether to match on equality (false) or regular-expression (true). Deprecated as of AlertManager >= v0.22.0 where a user should use MatchType instead.
+ type: boolean
+ value:
+ description: Label value to match.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ type: array
+ muteTimeIntervals:
+ description: List of MuteTimeInterval specifying when the routes should be muted.
+ items:
+ description: MuteTimeInterval specifies the periods in time when notifications will be muted
+ properties:
+ name:
+ description: Name of the time interval
+ type: string
+ timeIntervals:
+ description: TimeIntervals is a list of TimeInterval
+ items:
+ description: TimeInterval describes intervals of time
+ properties:
+ daysOfMonth:
+ description: DaysOfMonth is a list of DayOfMonthRange
+ items:
+ description: DayOfMonthRange is an inclusive range of days of the month beginning at 1
+ properties:
+ end:
+ description: End of the inclusive range
+ maximum: 31
+ minimum: -31
+ type: integer
+ start:
+ description: Start of the inclusive range
+ maximum: 31
+ minimum: -31
+ type: integer
+ type: object
+ type: array
+ months:
+ description: Months is a list of MonthRange
+ items:
+ description: MonthRange is an inclusive range of months of the year beginning in January Months can be specified by name (e.g 'January') by numerical month (e.g '1') or as an inclusive range (e.g 'January:March', '1:3', '1:March')
+ pattern: ^((?i)january|february|march|april|may|june|july|august|september|october|november|december|[1-12])(?:((:((?i)january|february|march|april|may|june|july|august|september|october|november|december|[1-12]))$)|$)
+ type: string
+ type: array
+ times:
+ description: Times is a list of TimeRange
+ items:
+ description: TimeRange defines a start and end time in 24hr format
+ properties:
+ endTime:
+ description: EndTime is the end time in 24hr format.
+ pattern: ^((([01][0-9])|(2[0-3])):[0-5][0-9])$|(^24:00$)
+ type: string
+ startTime:
+ description: StartTime is the start time in 24hr format.
+ pattern: ^((([01][0-9])|(2[0-3])):[0-5][0-9])$|(^24:00$)
+ type: string
+ type: object
+ type: array
+ weekdays:
+ description: Weekdays is a list of WeekdayRange
+ items:
+ description: WeekdayRange is an inclusive range of days of the week beginning on Sunday Days can be specified by name (e.g 'Sunday') or as an inclusive range (e.g 'Monday:Friday')
+ pattern: ^((?i)sun|mon|tues|wednes|thurs|fri|satur)day(?:((:(sun|mon|tues|wednes|thurs|fri|satur)day)$)|$)
+ type: string
+ type: array
+ years:
+ description: Years is a list of YearRange
+ items:
+ description: YearRange is an inclusive range of years
+ pattern: ^2\d{3}(?::2\d{3}|$)
+ type: string
+ type: array
+ type: object
+ type: array
+ type: object
+ type: array
+ receivers:
+ description: List of receivers.
+ items:
+ description: Receiver defines one or more notification integrations.
+ properties:
+ emailConfigs:
+ description: List of Email configurations.
+ items:
+ description: EmailConfig configures notifications via Email.
+ properties:
+ authIdentity:
+ description: The identity to use for authentication.
+ type: string
+ authPassword:
+ description: The secret's key that contains the password to use for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ authSecret:
+ description: The secret's key that contains the CRAM-MD5 secret. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ authUsername:
+ description: The username to use for authentication.
+ type: string
+ from:
+ description: The sender address.
+ type: string
+ headers:
+ description: Further headers email header key/value pairs. Overrides any headers previously set by the notification implementation.
+ items:
+ description: KeyValue defines a (key, value) tuple.
+ properties:
+ key:
+ description: Key of the tuple.
+ minLength: 1
+ type: string
+ value:
+ description: Value of the tuple.
+ type: string
+ required:
+ - key
+ - value
+ type: object
+ type: array
+ hello:
+ description: The hostname to identify to the SMTP server.
+ type: string
+ html:
+ description: The HTML body of the email notification.
+ type: string
+ requireTLS:
+ description: The SMTP TLS requirement. Note that Go does not support unencrypted connections to remote SMTP endpoints.
+ type: boolean
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ smarthost:
+ description: The SMTP host and port through which emails are sent. E.g. example.com:25
+ type: string
+ text:
+ description: The text body of the email notification.
+ type: string
+ tlsConfig:
+ description: TLS configuration
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ to:
+ description: The email address to send notifications to.
+ type: string
+ type: object
+ type: array
+ name:
+ description: Name of the receiver. Must be unique across all items from the list.
+ minLength: 1
+ type: string
+ opsgenieConfigs:
+ description: List of OpsGenie configurations.
+ items:
+ description: OpsGenieConfig configures notifications via OpsGenie. See https://prometheus.io/docs/alerting/latest/configuration/#opsgenie_config
+ properties:
+ actions:
+ description: Comma separated list of actions that will be available for the alert.
+ type: string
+ apiKey:
+ description: The secret's key that contains the OpsGenie API key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ apiURL:
+ description: The URL to send OpsGenie API requests to.
+ type: string
+ description:
+ description: Description of the incident.
+ type: string
+ details:
+ description: A set of arbitrary key/value pairs that provide further detail about the incident.
+ items:
+ description: KeyValue defines a (key, value) tuple.
+ properties:
+ key:
+ description: Key of the tuple.
+ minLength: 1
+ type: string
+ value:
+ description: Value of the tuple.
+ type: string
+ required:
+ - key
+ - value
+ type: object
+ type: array
+ entity:
+ description: Optional field that can be used to specify which domain alert is related to.
+ type: string
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: Alert text limited to 130 characters.
+ type: string
+ note:
+ description: Additional alert note.
+ type: string
+ priority:
+ description: Priority level of alert. Possible values are P1, P2, P3, P4, and P5.
+ type: string
+ responders:
+ description: List of responders responsible for notifications.
+ items:
+ description: OpsGenieConfigResponder defines a responder to an incident. One of `id`, `name` or `username` has to be defined.
+ properties:
+ id:
+ description: ID of the responder.
+ type: string
+ name:
+ description: Name of the responder.
+ type: string
+ type:
+ description: Type of responder.
+ enum:
+ - team
+ - teams
+ - user
+ - escalation
+ - schedule
+ minLength: 1
+ type: string
+ username:
+ description: Username of the responder.
+ type: string
+ required:
+ - type
+ type: object
+ type: array
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ source:
+ description: Backlink to the sender of the notification.
+ type: string
+ tags:
+ description: Comma separated list of tags attached to the notifications.
+ type: string
+ updateAlerts:
+ description: Whether to update message and description of the alert in OpsGenie if it already exists By default, the alert is never updated in OpsGenie, the new message only appears in activity log.
+ type: boolean
+ type: object
+ type: array
+ pagerdutyConfigs:
+ description: List of PagerDuty configurations.
+ items:
+ description: PagerDutyConfig configures notifications via PagerDuty. See https://prometheus.io/docs/alerting/latest/configuration/#pagerduty_config
+ properties:
+ class:
+ description: The class/type of the event.
+ type: string
+ client:
+ description: Client identification.
+ type: string
+ clientURL:
+ description: Backlink to the sender of notification.
+ type: string
+ component:
+ description: The part or component of the affected system that is broken.
+ type: string
+ description:
+ description: Description of the incident.
+ type: string
+ details:
+ description: Arbitrary key/value pairs that provide further detail about the incident.
+ items:
+ description: KeyValue defines a (key, value) tuple.
+ properties:
+ key:
+ description: Key of the tuple.
+ minLength: 1
+ type: string
+ value:
+ description: Value of the tuple.
+ type: string
+ required:
+ - key
+ - value
+ type: object
+ type: array
+ group:
+ description: A cluster or grouping of sources.
+ type: string
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ pagerDutyImageConfigs:
+ description: A list of image details to attach that provide further detail about an incident.
+ items:
+ description: PagerDutyImageConfig attaches images to an incident
+ properties:
+ alt:
+ description: Alt is the optional alternative text for the image.
+ type: string
+ href:
+ description: Optional URL; makes the image a clickable link.
+ type: string
+ src:
+ description: Src of the image being attached to the incident
+ type: string
+ type: object
+ type: array
+ pagerDutyLinkConfigs:
+ description: A list of link details to attach that provide further detail about an incident.
+ items:
+ description: PagerDutyLinkConfig attaches text links to an incident
+ properties:
+ alt:
+ description: Text that describes the purpose of the link, and can be used as the link's text.
+ type: string
+ href:
+ description: Href is the URL of the link to be attached
+ type: string
+ type: object
+ type: array
+ routingKey:
+ description: The secret's key that contains the PagerDuty integration key (when using Events API v2). Either this field or `serviceKey` needs to be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ serviceKey:
+ description: The secret's key that contains the PagerDuty service key (when using integration type "Prometheus"). Either this field or `routingKey` needs to be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ severity:
+ description: Severity of the incident.
+ type: string
+ url:
+ description: The URL to send requests to.
+ type: string
+ type: object
+ type: array
+ pushoverConfigs:
+ description: List of Pushover configurations.
+ items:
+ description: PushoverConfig configures notifications via Pushover. See https://prometheus.io/docs/alerting/latest/configuration/#pushover_config
+ properties:
+ expire:
+ description: How long your notification will continue to be retried for, unless the user acknowledges the notification.
+ pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$
+ type: string
+ html:
+ description: Whether notification message is HTML or plain text.
+ type: boolean
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: Notification message.
+ type: string
+ priority:
+ description: Priority, see https://pushover.net/api#priority
+ type: string
+ retry:
+ description: How often the Pushover servers will send the same notification to the user. Must be at least 30 seconds.
+ pattern: ^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ sound:
+ description: The name of one of the sounds supported by device clients to override the user's default sound choice
+ type: string
+ title:
+ description: Notification title.
+ type: string
+ token:
+ description: The secret's key that contains the registered application's API token, see https://pushover.net/apps. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ url:
+ description: A supplementary URL shown alongside the message.
+ type: string
+ urlTitle:
+ description: A title for supplementary URL, otherwise just the URL is shown
+ type: string
+ userKey:
+ description: The secret's key that contains the recipient user's user key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ type: object
+ type: array
+ slackConfigs:
+ description: List of Slack configurations.
+ items:
+ description: SlackConfig configures notifications via Slack. See https://prometheus.io/docs/alerting/latest/configuration/#slack_config
+ properties:
+ actions:
+ description: A list of Slack actions that are sent with each notification.
+ items:
+ description: SlackAction configures a single Slack action that is sent with each notification. See https://api.slack.com/docs/message-attachments#action_fields and https://api.slack.com/docs/message-buttons for more information.
+ properties:
+ confirm:
+ description: SlackConfirmationField protect users from destructive actions or particularly distinguished decisions by asking them to confirm their button click one more time. See https://api.slack.com/docs/interactive-message-field-guide#confirmation_fields for more information.
+ properties:
+ dismissText:
+ type: string
+ okText:
+ type: string
+ text:
+ minLength: 1
+ type: string
+ title:
+ type: string
+ required:
+ - text
+ type: object
+ name:
+ type: string
+ style:
+ type: string
+ text:
+ minLength: 1
+ type: string
+ type:
+ minLength: 1
+ type: string
+ url:
+ type: string
+ value:
+ type: string
+ required:
+ - text
+ - type
+ type: object
+ type: array
+ apiURL:
+ description: The secret's key that contains the Slack webhook URL. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ callbackId:
+ type: string
+ channel:
+ description: The channel or user to send notifications to.
+ type: string
+ color:
+ type: string
+ fallback:
+ type: string
+ fields:
+ description: A list of Slack fields that are sent with each notification.
+ items:
+ description: SlackField configures a single Slack field that is sent with each notification. Each field must contain a title, value, and optionally, a boolean value to indicate if the field is short enough to be displayed next to other fields designated as short. See https://api.slack.com/docs/message-attachments#fields for more information.
+ properties:
+ short:
+ type: boolean
+ title:
+ minLength: 1
+ type: string
+ value:
+ minLength: 1
+ type: string
+ required:
+ - title
+ - value
+ type: object
+ type: array
+ footer:
+ type: string
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ iconEmoji:
+ type: string
+ iconURL:
+ type: string
+ imageURL:
+ type: string
+ linkNames:
+ type: boolean
+ mrkdwnIn:
+ items:
+ type: string
+ type: array
+ pretext:
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ shortFields:
+ type: boolean
+ text:
+ type: string
+ thumbURL:
+ type: string
+ title:
+ type: string
+ titleLink:
+ type: string
+ username:
+ type: string
+ type: object
+ type: array
+ snsConfigs:
+ description: List of SNS configurations
+ items:
+ description: SNSConfig configures notifications via AWS SNS. See https://prometheus.io/docs/alerting/latest/configuration/#sns_configs
+ properties:
+ apiURL:
+ description: The SNS API URL i.e. https://sns.us-east-2.amazonaws.com. If not specified, the SNS API URL from the SNS SDK will be used.
+ type: string
+ attributes:
+ additionalProperties:
+ type: string
+ description: SNS message attributes.
+ type: object
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: The message content of the SNS notification.
+ type: string
+ phoneNumber:
+ description: Phone number if message is delivered via SMS in E.164 format. If you don't specify this value, you must specify a value for the TopicARN or TargetARN.
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ sigv4:
+ description: Configures AWS's Signature Verification 4 signing process to sign requests.
+ properties:
+ accessKey:
+ description: AccessKey is the AWS API key. If blank, the environment variable `AWS_ACCESS_KEY_ID` is used.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ profile:
+ description: Profile is the named AWS profile used to authenticate.
+ type: string
+ region:
+ description: Region is the AWS region. If blank, the region from the default credentials chain used.
+ type: string
+ roleArn:
+ description: RoleArn is the named AWS profile used to authenticate.
+ type: string
+ secretKey:
+ description: SecretKey is the AWS API secret. If blank, the environment variable `AWS_SECRET_ACCESS_KEY` is used.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ subject:
+ description: Subject line when the message is delivered to email endpoints.
+ type: string
+ targetARN:
+ description: The mobile platform endpoint ARN if message is delivered via mobile notifications. If you don't specify this value, you must specify a value for the topic_arn or PhoneNumber.
+ type: string
+ topicARN:
+ description: SNS topic ARN, i.e. arn:aws:sns:us-east-2:698519295917:My-Topic If you don't specify this value, you must specify a value for the PhoneNumber or TargetARN.
+ type: string
+ type: object
+ type: array
+ telegramConfigs:
+ description: List of Telegram configurations.
+ items:
+ description: TelegramConfig configures notifications via Telegram. See https://prometheus.io/docs/alerting/latest/configuration/#telegram_config
+ properties:
+ apiURL:
+ description: The Telegram API URL i.e. https://api.telegram.org. If not specified, default API URL will be used.
+ type: string
+ botToken:
+ description: Telegram bot token The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ chatID:
+ description: The Telegram chat ID.
+ format: int64
+ type: integer
+ disableNotifications:
+ description: Disable telegram notifications
+ type: boolean
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: Message template
+ type: string
+ parseMode:
+ description: Parse mode for telegram message
+ enum:
+ - MarkdownV2
+ - Markdown
+ - HTML
+ type: string
+ sendResolved:
+ description: Whether to notify about resolved alerts.
+ type: boolean
+ type: object
+ type: array
+ victoropsConfigs:
+ description: List of VictorOps configurations.
+ items:
+ description: VictorOpsConfig configures notifications via VictorOps. See https://prometheus.io/docs/alerting/latest/configuration/#victorops_config
+ properties:
+ apiKey:
+ description: The secret's key that contains the API key to use when talking to the VictorOps API. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ apiUrl:
+ description: The VictorOps API URL.
+ type: string
+ customFields:
+ description: Additional custom fields for notification.
+ items:
+ description: KeyValue defines a (key, value) tuple.
+ properties:
+ key:
+ description: Key of the tuple.
+ minLength: 1
+ type: string
+ value:
+ description: Value of the tuple.
+ type: string
+ required:
+ - key
+ - value
+ type: object
+ type: array
+ entityDisplayName:
+ description: Contains summary of the alerted problem.
+ type: string
+ httpConfig:
+ description: The HTTP client's configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ messageType:
+ description: Describes the behavior of the alert (CRITICAL, WARNING, INFO).
+ type: string
+ monitoringTool:
+ description: The monitoring tool the state message is from.
+ type: string
+ routingKey:
+ description: A key used to map the alert to a team.
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ stateMessage:
+ description: Contains long explanation of the alerted problem.
+ type: string
+ type: object
+ type: array
+ webhookConfigs:
+ description: List of webhook configurations.
+ items:
+ description: WebhookConfig configures notifications via a generic receiver supporting the webhook payload. See https://prometheus.io/docs/alerting/latest/configuration/#webhook_config
+ properties:
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ maxAlerts:
+ description: Maximum number of alerts to be sent per webhook message. When 0, all alerts are included.
+ format: int32
+ minimum: 0
+ type: integer
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ url:
+ description: The URL to send HTTP POST requests to. `urlSecret` takes precedence over `url`. One of `urlSecret` and `url` should be defined.
+ type: string
+ urlSecret:
+ description: The secret's key that contains the webhook URL to send HTTP requests to. `urlSecret` takes precedence over `url`. One of `urlSecret` and `url` should be defined. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ type: object
+ type: array
+ wechatConfigs:
+ description: List of WeChat configurations.
+ items:
+ description: WeChatConfig configures notifications via WeChat. See https://prometheus.io/docs/alerting/latest/configuration/#wechat_config
+ properties:
+ agentID:
+ type: string
+ apiSecret:
+ description: The secret's key that contains the WeChat API key. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ apiURL:
+ description: The WeChat API URL.
+ type: string
+ corpID:
+ description: The corp id for authentication.
+ type: string
+ httpConfig:
+ description: HTTP client configuration.
+ properties:
+ authorization:
+ description: Authorization header configuration for the client. This is mutually exclusive with BasicAuth and is only available starting from Alertmanager v0.22+.
+ properties:
+ credentials:
+ description: The secret's key that contains the credentials of the request
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type:
+ description: Set the authentication type. Defaults to Bearer, Basic will cause an error
+ type: string
+ type: object
+ basicAuth:
+ description: BasicAuth for the client. This is mutually exclusive with Authorization. If both are defined, BasicAuth takes precedence.
+ properties:
+ password:
+ description: The secret in the service monitor namespace that contains the password for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ username:
+ description: The secret in the service monitor namespace that contains the username for authentication.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ bearerTokenSecret:
+ description: The secret's key that contains the bearer token to be used by the client for authentication. The secret needs to be in the same namespace as the AlertmanagerConfig object and accessible by the Prometheus Operator.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ followRedirects:
+ description: FollowRedirects specifies whether the client should follow HTTP 3xx redirects.
+ type: boolean
+ oauth2:
+ description: OAuth2 client credentials used to fetch a token for the targets.
+ properties:
+ clientId:
+ description: The secret or configmap containing the OAuth2 client id
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ clientSecret:
+ description: The secret containing the OAuth2 client secret
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ endpointParams:
+ additionalProperties:
+ type: string
+ description: Parameters to append to the token URL
+ type: object
+ scopes:
+ description: OAuth2 scopes used for the token request
+ items:
+ type: string
+ type: array
+ tokenUrl:
+ description: The URL to fetch the token from
+ minLength: 1
+ type: string
+ required:
+ - clientId
+ - clientSecret
+ - tokenUrl
+ type: object
+ proxyURL:
+ description: Optional proxy URL.
+ type: string
+ tlsConfig:
+ description: TLS configuration for the client.
+ properties:
+ ca:
+ description: Certificate authority used when verifying server certificates.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ cert:
+ description: Client certificate to present when doing client-authentication.
+ properties:
+ configMap:
+ description: ConfigMap containing data to use for the targets.
+ properties:
+ key:
+ description: The key to select.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the ConfigMap or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ secret:
+ description: Secret containing data to use for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ insecureSkipVerify:
+ description: Disable target certificate validation.
+ type: boolean
+ keySecret:
+ description: Secret containing the client key file for the targets.
+ properties:
+ key:
+ description: The key of the secret to select from. Must be a valid secret key.
+ type: string
+ name:
+ description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
+ type: string
+ optional:
+ description: Specify whether the Secret or its key must be defined
+ type: boolean
+ required:
+ - key
+ type: object
+ x-kubernetes-map-type: atomic
+ serverName:
+ description: Used to verify the hostname for the targets.
+ type: string
+ type: object
+ type: object
+ message:
+ description: API request data as defined by the WeChat API.
+ type: string
+ messageType:
+ type: string
+ sendResolved:
+ description: Whether or not to notify about resolved alerts.
+ type: boolean
+ toParty:
+ type: string
+ toTag:
+ type: string
+ toUser:
+ type: string
+ type: object
+ type: array
+ required:
+ - name
+ type: object
+ type: array
+ route:
+ description: The Alertmanager route definition for alerts matching the resource's namespace. If present, it will be added to the generated Alertmanager configuration as a first-level route.
+ properties:
+ activeTimeIntervals:
+ description: ActiveTimeIntervals is a list of MuteTimeInterval names when this route should be active.
+ items:
+ type: string
+ type: array
+ continue:
+ description: Boolean indicating whether an alert should continue matching subsequent sibling nodes. It will always be overridden to true for the first-level route by the Prometheus operator.
+ type: boolean
+ groupBy:
+ description: List of labels to group by. Labels must not be repeated (unique list). Special label "..." (aggregate by all possible labels), if provided, must be the only element in the list.
+ items:
+ type: string
+ type: array
+ groupInterval:
+ description: 'How long to wait before sending an updated notification. Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$` Example: "5m"'
+ type: string
+ groupWait:
+ description: 'How long to wait before sending the initial notification. Must match the regular expression`^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$` Example: "30s"'
+ type: string
+ matchers:
+ description: 'List of matchers that the alert''s labels should match. For the first level route, the operator removes any existing equality and regexp matcher on the `namespace` label and adds a `namespace: