mirror of
				https://github.com/carlosedp/cluster-monitoring.git
				synced 2025-10-26 10:23:04 +01:00 
			
		
		
		
	Update libs. Re-add Kube-state-metrics override due to reverted threshold parameter on lib
This commit is contained in:
		
							parent
							
								
									de1c46dd63
								
							
						
					
					
						commit
						d4114769e2
					
				
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @ -11,7 +11,7 @@ manifests: jsonnet | ||||
| 	rm -rf manifests | ||||
| 	./scripts/build.sh main.jsonnet $(JSONNET_BIN) | ||||
| 
 | ||||
| update: | ||||
| update: jsonnet_bundler | ||||
| 	jb update | ||||
| 
 | ||||
| vendor: jsonnet_bundler jsonnetfile.json jsonnetfile.lock.json | ||||
|  | ||||
| @ -120,6 +120,38 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; | ||||
| 
 | ||||
|   grafanaDashboards+:: $._config.grafanaDashboards, | ||||
| 
 | ||||
|   kubeStateMetrics+:: { | ||||
|     // Override command for addon-resizer due to change from parameter --threshold to --acceptance-offset | ||||
|     deployment+: { | ||||
|       spec+: { | ||||
|         template+: { | ||||
|           spec+: { | ||||
|             containers: | ||||
|               std.map( | ||||
|                 function(c) | ||||
|                   if std.startsWith(c.name, 'addon-resizer') then | ||||
|                     c { | ||||
|                       command: [ | ||||
|                         '/pod_nanny', | ||||
|                         '--container=kube-state-metrics', | ||||
|                         '--cpu=100m', | ||||
|                         '--extra-cpu=2m', | ||||
|                         '--memory=150Mi', | ||||
|                         '--extra-memory=30Mi', | ||||
|                         '--acceptance-offset=5', | ||||
|                         '--deployment=kube-state-metrics', | ||||
|                       ], | ||||
|                     } | ||||
|                   else | ||||
|                     c, | ||||
|                 super.containers, | ||||
|               ), | ||||
|           }, | ||||
|         }, | ||||
|       }, | ||||
|     }, | ||||
|   }, | ||||
| 
 | ||||
|   // Create ingress objects per application | ||||
|   ingress+: { | ||||
|     local secret = k.core.v1.secret, | ||||
|  | ||||
| @ -8,7 +8,7 @@ | ||||
|                     "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" | ||||
|                 } | ||||
|             }, | ||||
|             "version": "99e5661f8d46ec2173f65b69eeb97f6e52d38e0d" | ||||
|             "version": "650359b3e627ae97a1f18cbd10d7ed9b2293c240" | ||||
|         }, | ||||
|         { | ||||
|             "name": "ksonnet", | ||||
| @ -28,7 +28,7 @@ | ||||
|                     "subdir": "" | ||||
|                 } | ||||
|             }, | ||||
|             "version": "9069b2c1be0ce32f63f9a01c4a4f8d69bc4e37d5" | ||||
|             "version": "19db38fc449df024446059f21d5a329babaa3927" | ||||
|         }, | ||||
|         { | ||||
|             "name": "grafonnet", | ||||
| @ -38,7 +38,7 @@ | ||||
|                     "subdir": "grafonnet" | ||||
|                 } | ||||
|             }, | ||||
|             "version": "11022f5e920ac1ea960556193e3f0ab57d70d7c5" | ||||
|             "version": "d270f529db9eb750425a173188c534ab92532f47" | ||||
|         }, | ||||
|         { | ||||
|             "name": "grafana-builder", | ||||
| @ -48,7 +48,7 @@ | ||||
|                     "subdir": "grafana-builder" | ||||
|                 } | ||||
|             }, | ||||
|             "version": "5cc4bfab6e2453266e47d01b78cbae0b2643426e" | ||||
|             "version": "e30a6040f3d7270655a980ab04d16142da4b429d" | ||||
|         }, | ||||
|         { | ||||
|             "name": "grafana", | ||||
| @ -58,7 +58,7 @@ | ||||
|                     "subdir": "grafana" | ||||
|                 } | ||||
|             }, | ||||
|             "version": "9ddf5a198b0f7c898dc061158ea427112acbae11" | ||||
|             "version": "de2ec3f0f9115da2d47dc6b86af9b402e2bf146d" | ||||
|         }, | ||||
|         { | ||||
|             "name": "prometheus-operator", | ||||
| @ -78,7 +78,7 @@ | ||||
|                     "subdir": "Documentation/etcd-mixin" | ||||
|                 } | ||||
|             }, | ||||
|             "version": "e1ca3b4434945e57e8e3a451cdbde74a903cc8e1" | ||||
|             "version": "7a5acb4a43aa06bd9e32ab59a46271ab88d497e4" | ||||
|         } | ||||
|     ] | ||||
| } | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| apiVersion: v1 | ||||
| data: | ||||
|   alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAibnVsbCIKInJvdXRlIjoKICAiZ3JvdXBfYnkiOgogIC0gImpvYiIKICAiZ3JvdXBfaW50ZXJ2YWwiOiAiNW0iCiAgImdyb3VwX3dhaXQiOiAiMzBzIgogICJyZWNlaXZlciI6ICJudWxsIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiMTJoIgogICJyb3V0ZXMiOgogIC0gIm1hdGNoIjoKICAgICAgImFsZXJ0bmFtZSI6ICJEZWFkTWFuc1N3aXRjaCIKICAgICJyZWNlaXZlciI6ICJudWxsIg== | ||||
|   alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAibnVsbCIKInJvdXRlIjoKICAiZ3JvdXBfYnkiOgogIC0gImpvYiIKICAiZ3JvdXBfaW50ZXJ2YWwiOiAiNW0iCiAgImdyb3VwX3dhaXQiOiAiMzBzIgogICJyZWNlaXZlciI6ICJudWxsIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiMTJoIgogICJyb3V0ZXMiOgogIC0gIm1hdGNoIjoKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg== | ||||
| kind: Secret | ||||
| metadata: | ||||
|   name: alertmanager-main | ||||
|  | ||||
| @ -12736,7 +12736,7 @@ items: | ||||
|                           ], | ||||
|                           "timeFrom": null, | ||||
|                           "timeShift": null, | ||||
|                           "title": "CPU Utilizaion", | ||||
|                           "title": "CPU Utilization", | ||||
|                           "tooltip": { | ||||
|                               "shared": false, | ||||
|                               "sort": 0, | ||||
| @ -14361,11 +14361,25 @@ items: | ||||
|                           "steppedLine": false, | ||||
|                           "targets": [ | ||||
|                               { | ||||
|                                   "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))", | ||||
|                                   "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"}[1m]))", | ||||
|                                   "format": "time_series", | ||||
|                                   "intervalFactor": 2, | ||||
|                                   "legendFormat": "{{ container_name }}", | ||||
|                                   "legendFormat": "Current: {{ container_name }}", | ||||
|                                   "refId": "A" | ||||
|                               }, | ||||
|                               { | ||||
|                                   "expr": "sum by(container) (kube_pod_container_resource_requests_cpu_cores{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", | ||||
|                                   "format": "time_series", | ||||
|                                   "intervalFactor": 2, | ||||
|                                   "legendFormat": "Requested: {{ container }}", | ||||
|                                   "refId": "B" | ||||
|                               }, | ||||
|                               { | ||||
|                                   "expr": "sum by(container) (kube_pod_container_resource_limits_cpu_cores{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", | ||||
|                                   "format": "time_series", | ||||
|                                   "intervalFactor": 2, | ||||
|                                   "legendFormat": "Limit: {{ container }}", | ||||
|                                   "refId": "C" | ||||
|                               } | ||||
|                           ], | ||||
|                           "thresholds": [ | ||||
|  | ||||
| @ -37,7 +37,7 @@ spec: | ||||
|       record: namespace_name:container_memory_usage_bytes:sum | ||||
|     - expr: | | ||||
|         sum by (namespace, label_name) ( | ||||
|           sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}) by (namespace, pod) | ||||
|           sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod) | ||||
|         * on (namespace, pod) group_left(label_name) | ||||
|           label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)") | ||||
|         ) | ||||
| @ -629,7 +629,7 @@ spec: | ||||
|         message: Cluster has overcommitted CPU resource requests for Namespaces. | ||||
|         runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit | ||||
|       expr: | | ||||
|         sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"}) | ||||
|         sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) | ||||
|           / | ||||
|         sum(node:node_num_cpu:sum) | ||||
|           > 1.5 | ||||
| @ -641,7 +641,7 @@ spec: | ||||
|         message: Cluster has overcommitted memory resource requests for Namespaces. | ||||
|         runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit | ||||
|       expr: | | ||||
|         sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"}) | ||||
|         sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) | ||||
|           / | ||||
|         sum(node_memory_MemTotal_bytes{job="node-exporter"}) | ||||
|           > 1.5 | ||||
| @ -842,7 +842,7 @@ spec: | ||||
|           in less than 7 days. | ||||
|         runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration | ||||
|       expr: | | ||||
|         histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 | ||||
|         apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 | ||||
|       labels: | ||||
|         severity: warning | ||||
|     - alert: KubeClientCertificateExpiration | ||||
| @ -851,7 +851,7 @@ spec: | ||||
|           in less than 24 hours. | ||||
|         runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration | ||||
|       expr: | | ||||
|         histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 | ||||
|         apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 | ||||
|       labels: | ||||
|         severity: critical | ||||
|   - name: alertmanager.rules | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user