mirror of
https://github.com/carlosedp/cluster-monitoring.git
synced 2024-11-20 19:07:17 +01:00
Update libs. Re-add Kube-state-metrics override due to reverted threshold parameter on lib
This commit is contained in:
parent
de1c46dd63
commit
d4114769e2
2
Makefile
2
Makefile
@ -11,7 +11,7 @@ manifests: jsonnet
|
|||||||
rm -rf manifests
|
rm -rf manifests
|
||||||
./scripts/build.sh main.jsonnet $(JSONNET_BIN)
|
./scripts/build.sh main.jsonnet $(JSONNET_BIN)
|
||||||
|
|
||||||
update:
|
update: jsonnet_bundler
|
||||||
jb update
|
jb update
|
||||||
|
|
||||||
vendor: jsonnet_bundler jsonnetfile.json jsonnetfile.lock.json
|
vendor: jsonnet_bundler jsonnetfile.json jsonnetfile.lock.json
|
||||||
|
@ -120,6 +120,38 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||||||
|
|
||||||
grafanaDashboards+:: $._config.grafanaDashboards,
|
grafanaDashboards+:: $._config.grafanaDashboards,
|
||||||
|
|
||||||
|
kubeStateMetrics+:: {
|
||||||
|
// Override command for addon-resizer due to change from parameter --threshold to --acceptance-offset
|
||||||
|
deployment+: {
|
||||||
|
spec+: {
|
||||||
|
template+: {
|
||||||
|
spec+: {
|
||||||
|
containers:
|
||||||
|
std.map(
|
||||||
|
function(c)
|
||||||
|
if std.startsWith(c.name, 'addon-resizer') then
|
||||||
|
c {
|
||||||
|
command: [
|
||||||
|
'/pod_nanny',
|
||||||
|
'--container=kube-state-metrics',
|
||||||
|
'--cpu=100m',
|
||||||
|
'--extra-cpu=2m',
|
||||||
|
'--memory=150Mi',
|
||||||
|
'--extra-memory=30Mi',
|
||||||
|
'--acceptance-offset=5',
|
||||||
|
'--deployment=kube-state-metrics',
|
||||||
|
],
|
||||||
|
}
|
||||||
|
else
|
||||||
|
c,
|
||||||
|
super.containers,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
// Create ingress objects per application
|
// Create ingress objects per application
|
||||||
ingress+: {
|
ingress+: {
|
||||||
local secret = k.core.v1.secret,
|
local secret = k.core.v1.secret,
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "99e5661f8d46ec2173f65b69eeb97f6e52d38e0d"
|
"version": "650359b3e627ae97a1f18cbd10d7ed9b2293c240"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ksonnet",
|
"name": "ksonnet",
|
||||||
@ -28,7 +28,7 @@
|
|||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "9069b2c1be0ce32f63f9a01c4a4f8d69bc4e37d5"
|
"version": "19db38fc449df024446059f21d5a329babaa3927"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "grafonnet",
|
"name": "grafonnet",
|
||||||
@ -38,7 +38,7 @@
|
|||||||
"subdir": "grafonnet"
|
"subdir": "grafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "11022f5e920ac1ea960556193e3f0ab57d70d7c5"
|
"version": "d270f529db9eb750425a173188c534ab92532f47"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "grafana-builder",
|
"name": "grafana-builder",
|
||||||
@ -48,7 +48,7 @@
|
|||||||
"subdir": "grafana-builder"
|
"subdir": "grafana-builder"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "5cc4bfab6e2453266e47d01b78cbae0b2643426e"
|
"version": "e30a6040f3d7270655a980ab04d16142da4b429d"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "grafana",
|
"name": "grafana",
|
||||||
@ -58,7 +58,7 @@
|
|||||||
"subdir": "grafana"
|
"subdir": "grafana"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "9ddf5a198b0f7c898dc061158ea427112acbae11"
|
"version": "de2ec3f0f9115da2d47dc6b86af9b402e2bf146d"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "prometheus-operator",
|
"name": "prometheus-operator",
|
||||||
@ -78,7 +78,7 @@
|
|||||||
"subdir": "Documentation/etcd-mixin"
|
"subdir": "Documentation/etcd-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "e1ca3b4434945e57e8e3a451cdbde74a903cc8e1"
|
"version": "7a5acb4a43aa06bd9e32ab59a46271ab88d497e4"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
data:
|
data:
|
||||||
alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAibnVsbCIKInJvdXRlIjoKICAiZ3JvdXBfYnkiOgogIC0gImpvYiIKICAiZ3JvdXBfaW50ZXJ2YWwiOiAiNW0iCiAgImdyb3VwX3dhaXQiOiAiMzBzIgogICJyZWNlaXZlciI6ICJudWxsIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiMTJoIgogICJyb3V0ZXMiOgogIC0gIm1hdGNoIjoKICAgICAgImFsZXJ0bmFtZSI6ICJEZWFkTWFuc1N3aXRjaCIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
|
alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAibnVsbCIKInJvdXRlIjoKICAiZ3JvdXBfYnkiOgogIC0gImpvYiIKICAiZ3JvdXBfaW50ZXJ2YWwiOiAiNW0iCiAgImdyb3VwX3dhaXQiOiAiMzBzIgogICJyZWNlaXZlciI6ICJudWxsIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiMTJoIgogICJyb3V0ZXMiOgogIC0gIm1hdGNoIjoKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
name: alertmanager-main
|
name: alertmanager-main
|
||||||
|
@ -12736,7 +12736,7 @@ items:
|
|||||||
],
|
],
|
||||||
"timeFrom": null,
|
"timeFrom": null,
|
||||||
"timeShift": null,
|
"timeShift": null,
|
||||||
"title": "CPU Utilizaion",
|
"title": "CPU Utilization",
|
||||||
"tooltip": {
|
"tooltip": {
|
||||||
"shared": false,
|
"shared": false,
|
||||||
"sort": 0,
|
"sort": 0,
|
||||||
@ -14361,11 +14361,25 @@ items:
|
|||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))",
|
"expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"}[1m]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{ container_name }}",
|
"legendFormat": "Current: {{ container_name }}",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum by(container) (kube_pod_container_resource_requests_cpu_cores{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Requested: {{ container }}",
|
||||||
|
"refId": "B"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum by(container) (kube_pod_container_resource_limits_cpu_cores{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Limit: {{ container }}",
|
||||||
|
"refId": "C"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": [
|
"thresholds": [
|
||||||
|
@ -37,7 +37,7 @@ spec:
|
|||||||
record: namespace_name:container_memory_usage_bytes:sum
|
record: namespace_name:container_memory_usage_bytes:sum
|
||||||
- expr: |
|
- expr: |
|
||||||
sum by (namespace, label_name) (
|
sum by (namespace, label_name) (
|
||||||
sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}) by (namespace, pod)
|
sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod)
|
||||||
* on (namespace, pod) group_left(label_name)
|
* on (namespace, pod) group_left(label_name)
|
||||||
label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
|
label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
|
||||||
)
|
)
|
||||||
@ -629,7 +629,7 @@ spec:
|
|||||||
message: Cluster has overcommitted CPU resource requests for Namespaces.
|
message: Cluster has overcommitted CPU resource requests for Namespaces.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
||||||
expr: |
|
expr: |
|
||||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"})
|
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
|
||||||
/
|
/
|
||||||
sum(node:node_num_cpu:sum)
|
sum(node:node_num_cpu:sum)
|
||||||
> 1.5
|
> 1.5
|
||||||
@ -641,7 +641,7 @@ spec:
|
|||||||
message: Cluster has overcommitted memory resource requests for Namespaces.
|
message: Cluster has overcommitted memory resource requests for Namespaces.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
|
||||||
expr: |
|
expr: |
|
||||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"})
|
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
|
||||||
/
|
/
|
||||||
sum(node_memory_MemTotal_bytes{job="node-exporter"})
|
sum(node_memory_MemTotal_bytes{job="node-exporter"})
|
||||||
> 1.5
|
> 1.5
|
||||||
@ -842,7 +842,7 @@ spec:
|
|||||||
in less than 7 days.
|
in less than 7 days.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||||
expr: |
|
expr: |
|
||||||
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeClientCertificateExpiration
|
- alert: KubeClientCertificateExpiration
|
||||||
@ -851,7 +851,7 @@ spec:
|
|||||||
in less than 24 hours.
|
in less than 24 hours.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||||
expr: |
|
expr: |
|
||||||
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
- name: alertmanager.rules
|
- name: alertmanager.rules
|
||||||
|
Loading…
Reference in New Issue
Block a user