From 7c31f8e99817e22bbb2bb7c7572eaa62c8ed0fe8 Mon Sep 17 00:00:00 2001 From: CarlosEDP Date: Wed, 13 Mar 2019 18:44:16 -0300 Subject: [PATCH] Updated libraries --- manifests/grafana-dashboardDefinitions.yaml | 802 +++++++++++++------- manifests/prometheus-adapter-configMap.yaml | 4 +- manifests/prometheus-rules.yaml | 42 +- 3 files changed, 572 insertions(+), 276 deletions(-) diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index a0deb6f..aeac56d 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -877,7 +877,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(coredns_dns_request_count_total{instance=\u007e\"$instance\"}[5m])) by (proto)", + "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (proto)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{proto}}", @@ -885,7 +885,7 @@ items: "step": 60 }, { - "expr": "sum(rate(coredns_dns_request_count_total{instance=\u007e\"$instance\"}[5m]))", + "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "total", @@ -993,7 +993,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(coredns_dns_request_type_count_total{instance=\u007e\"$instance\"}[5m])) by (type)", + "expr": "sum(rate(coredns_dns_request_type_count_total{instance=~\"$instance\"}[5m])) by (type)", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", @@ -1096,14 +1096,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(coredns_dns_request_count_total{instance=\u007e\"$instance\"}[5m])) by (zone)", + "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (zone)", "intervalFactor": 2, "legendFormat": "{{zone}}", "refId": "A", "step": 60 }, { - "expr": "sum(rate(coredns_dns_request_count_total{instance=\u007e\"$instance\"}[5m]))", + "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))", "intervalFactor": 2, "legendFormat": "total", "refId": "B", @@ -1206,14 +1206,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(coredns_dns_request_do_count_total{instance=\u007e\"$instance\"}[5m]))", + "expr": "sum(rate(coredns_dns_request_do_count_total{instance=~\"$instance\"}[5m]))", "intervalFactor": 2, "legendFormat": "DO", "refId": "A", "step": 40 }, { - "expr": "sum(rate(coredns_dns_request_count_total{instance=\u007e\"$instance\"}[5m]))", + "expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m]))", "intervalFactor": 2, "legendFormat": "total", "refId": "B", @@ -1324,21 +1324,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", "intervalFactor": 2, "legendFormat": "{{proto}}:99 ", "refId": "A", "step": 60 }, { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", "intervalFactor": 2, "legendFormat": "{{proto}}:90", "refId": "B", "step": 60 }, { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))", "intervalFactor": 2, "legendFormat": "{{proto}}:50", "refId": "C", @@ -1449,21 +1449,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", "intervalFactor": 2, "legendFormat": "{{proto}}:99 ", "refId": "A", "step": 60 }, { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", "intervalFactor": 2, "legendFormat": "{{proto}}:90", "refId": "B", "step": 60 }, { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))", "intervalFactor": 2, "legendFormat": "{{proto}}:50", "refId": "C", @@ -1563,7 +1563,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(coredns_dns_response_rcode_count_total{instance=\u007e\"$instance\"}[5m])) by (rcode)", + "expr": "sum(rate(coredns_dns_response_rcode_count_total{instance=~\"$instance\"}[5m])) by (rcode)", "intervalFactor": 2, "legendFormat": "{{rcode}}", "refId": "A", @@ -1663,21 +1663,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=\u007e\"$instance\"}[5m])) by (le, job))", + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le, job))", "intervalFactor": 2, "legendFormat": "99%", "refId": "A", "step": 40 }, { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=\u007e\"$instance\"}[5m])) by (le))", + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le))", "intervalFactor": 2, "legendFormat": "90%", "refId": "B", "step": 40 }, { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=\u007e\"$instance\"}[5m])) by (le))", + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_request_duration_milliseconds_bucket{instance=~\"$instance\"}[5m])) by (le))", "intervalFactor": 2, "legendFormat": "50%", "refId": "C", @@ -1792,7 +1792,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", "intervalFactor": 2, "legendFormat": "{{proto}}:99%", "refId": "A", @@ -1806,7 +1806,7 @@ items: "step": 40 }, { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)) ", "intervalFactor": 2, "legendFormat": "{{proto}}:50%", "metric": "", @@ -1922,21 +1922,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", + "expr": "histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", "intervalFactor": 2, "legendFormat": "{{proto}}:99%", "refId": "A", "step": 40 }, { - "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", + "expr": "histogram_quantile(0.90, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)) ", "intervalFactor": 2, "legendFormat": "{{proto}}:90%", "refId": "B", "step": 40 }, { - "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=\u007e\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ", + "expr": "histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le, proto)) ", "intervalFactor": 2, "legendFormat": "{{proto}}:50%", "metric": "", @@ -2037,7 +2037,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(coredns_cache_size{instance=\u007e\"$instance\"}) by (type)", + "expr": "sum(coredns_cache_size{instance=~\"$instance\"}) by (type)", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", @@ -2140,14 +2140,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(coredns_cache_hits_total{instance=\u007e\"$instance\"}[5m])) by (type)", + "expr": "sum(rate(coredns_cache_hits_total{instance=~\"$instance\"}[5m])) by (type)", "intervalFactor": 2, "legendFormat": "hits:{{type}}", "refId": "A", "step": 40 }, { - "expr": "sum(rate(coredns_cache_misses_total{instance=\u007e\"$instance\"}[5m])) by (type)", + "expr": "sum(rate(coredns_cache_misses_total{instance=~\"$instance\"}[5m])) by (type)", "intervalFactor": 2, "legendFormat": "misses", "refId": "B", @@ -2380,7 +2380,7 @@ items: "tableColumn": "Value", "targets": [ { - "expr": "scalar(elasticsearch_cluster_health_status{color=\"green\",cluster=\u007e\"$cluster\"}) + scalar(elasticsearch_cluster_health_status{color=\"yellow\",cluster=\u007e\"$cluster\"}) * 2 + scalar(elasticsearch_cluster_health_status{color=\"red\",cluster=\u007e\"$cluster\"}) * 3", + "expr": "scalar(elasticsearch_cluster_health_status{color=\"green\",cluster=~\"$cluster\"}) + scalar(elasticsearch_cluster_health_status{color=\"yellow\",cluster=~\"$cluster\"}) * 2 + scalar(elasticsearch_cluster_health_status{color=\"red\",cluster=~\"$cluster\"}) * 3", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -2481,7 +2481,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(elasticsearch_cluster_health_number_of_nodes{cluster=\u007e\"$cluster\"})/count(elasticsearch_cluster_health_number_of_nodes{cluster=\u007e\"$cluster\"})", + "expr": "sum(elasticsearch_cluster_health_number_of_nodes{cluster=~\"$cluster\"})/count(elasticsearch_cluster_health_number_of_nodes{cluster=~\"$cluster\"})", "format": "time_series", "instant": true, "interval": "", @@ -2570,7 +2570,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "elasticsearch_cluster_health_number_of_data_nodes{cluster=\u007e\"$cluster\"}", + "expr": "elasticsearch_cluster_health_number_of_data_nodes{cluster=~\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -2653,7 +2653,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "elasticsearch_cluster_health_number_of_pending_tasks{cluster=\u007e\"$cluster\"}", + "expr": "elasticsearch_cluster_health_number_of_pending_tasks{cluster=~\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -2751,7 +2751,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "elasticsearch_cluster_health_active_shards{cluster=\u007e\"$cluster\"}", + "expr": "elasticsearch_cluster_health_active_shards{cluster=~\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -2835,7 +2835,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "elasticsearch_cluster_health_active_primary_shards{cluster=\u007e\"$cluster\"}", + "expr": "elasticsearch_cluster_health_active_primary_shards{cluster=~\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -2918,7 +2918,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "elasticsearch_cluster_health_initializing_shards{cluster=\u007e\"$cluster\"}", + "expr": "elasticsearch_cluster_health_initializing_shards{cluster=~\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -3001,7 +3001,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "elasticsearch_cluster_health_relocating_shards{cluster=\u007e\"$cluster\"}", + "expr": "elasticsearch_cluster_health_relocating_shards{cluster=~\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -3084,7 +3084,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "elasticsearch_cluster_health_unassigned_shards{cluster=\u007e\"$cluster\"}", + "expr": "elasticsearch_cluster_health_unassigned_shards{cluster=~\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -3167,7 +3167,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "elasticsearch_cluster_health_delayed_unassigned_shards{cluster=\u007e\"$cluster\"}", + "expr": "elasticsearch_cluster_health_delayed_unassigned_shards{cluster=~\"$cluster\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -3251,7 +3251,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(elasticsearch_indices_docs{cluster=\u007e\"$cluster\"})", + "expr": "sum(elasticsearch_indices_docs{cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Documents", @@ -3354,7 +3354,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(elasticsearch_indices_store_size_bytes{cluster=\u007e\"$cluster\"})", + "expr": "sum(elasticsearch_indices_store_size_bytes{cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Index Size", @@ -3452,7 +3452,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (name) (rate(elasticsearch_indices_indexing_index_total{cluster=\u007e\"$cluster\"}[1h]))", + "expr": "sum by (name) (rate(elasticsearch_indices_indexing_index_total{cluster=~\"$cluster\"}[1h]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}}", @@ -3550,7 +3550,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (name) (rate(elasticsearch_indices_search_fetch_total{cluster=\u007e\"$cluster\"}[1h]))", + "expr": "sum by (name) (rate(elasticsearch_indices_search_fetch_total{cluster=~\"$cluster\"}[1h]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}}", @@ -3653,7 +3653,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(elasticsearch_thread_pool_queue_count{cluster=\u007e\"$cluster\", type!=\"management\"}) by (type)", + "expr": "sum(elasticsearch_thread_pool_queue_count{cluster=~\"$cluster\", type!=\"management\"}) by (type)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -4033,7 +4033,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(elasticsearch_thread_pool_active_count{cluster=\u007e\"$cluster\", type!=\"management\"}) by (type)", + "expr": "sum(elasticsearch_thread_pool_active_count{cluster=~\"$cluster\", type!=\"management\"}) by (type)", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -4136,7 +4136,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (name,type) (rate(elasticsearch_thread_pool_rejected_count{cluster=\u007e\"$cluster\", type!=\"management\"}[5m]))", + "expr": "sum by (name,type) (rate(elasticsearch_thread_pool_rejected_count{cluster=~\"$cluster\", type!=\"management\"}[5m]))", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -4249,7 +4249,7 @@ items: } ], "dsType": "elasticsearch", - "expr": "sum by (name) (elasticsearch_process_cpu_percent{cluster=\u007e\"$cluster\"})", + "expr": "sum by (name) (elasticsearch_process_cpu_percent{cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ name }}", @@ -4362,7 +4362,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (name) (avg_over_time(elasticsearch_jvm_memory_used_bytes{area=\"heap\",cluster=\u007e\"$cluster\"}[15m]) / elasticsearch_jvm_memory_max_bytes{area=\"heap\",cluster=\u007e\"$cluster\"})", + "expr": "sum by (name) (avg_over_time(elasticsearch_jvm_memory_used_bytes{area=\"heap\",cluster=~\"$cluster\"}[15m]) / elasticsearch_jvm_memory_max_bytes{area=\"heap\",cluster=~\"$cluster\"})", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -4468,7 +4468,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(elasticsearch_transport_rx_size_bytes_total{cluster=\u007e\"$cluster\"}[5m]))", + "expr": "sum(rate(elasticsearch_transport_rx_size_bytes_total{cluster=~\"$cluster\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "RX", @@ -4476,7 +4476,7 @@ items: "step": 240 }, { - "expr": "sum(rate(elasticsearch_transport_tx_size_bytes_total{cluster=\u007e\"$cluster\"}[5m])) * -1", + "expr": "sum(rate(elasticsearch_transport_tx_size_bytes_total{cluster=~\"$cluster\"}[5m])) * -1", "format": "time_series", "intervalFactor": 2, "legendFormat": "TX", @@ -4578,7 +4578,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "irate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=\u007e\"$cluster\"}[1m])", + "expr": "irate(elasticsearch_jvm_gc_collection_seconds_sum{cluster=~\"$cluster\"}[1m])", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -4820,7 +4820,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "count(kube_pod_info{pod=\u007e\"fluentd.*\"}) / count(node_boot_time_seconds)", + "expr": "count(kube_pod_info{pod=~\"fluentd.*\"}) / count(node_boot_time_seconds)", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -4884,7 +4884,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(fluentd_output_status_buffer_queue_length * on(pod) group_left(host_ip) kube_pod_info{pod=\u007e\"fluentd.*\"}) by (host_ip)", + "expr": "sum(fluentd_output_status_buffer_queue_length * on(pod) group_left(host_ip) kube_pod_info{pod=~\"fluentd.*\"}) by (host_ip)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{host_ip}}", @@ -4983,7 +4983,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(fluentd_output_status_buffer_total_bytes * on(pod) group_left(host_ip) kube_pod_info{pod=\u007e\"fluentd.*\"}) by (host_ip)", + "expr": "sum(fluentd_output_status_buffer_total_bytes * on(pod) group_left(host_ip) kube_pod_info{pod=~\"fluentd.*\"}) by (host_ip)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{host_ip}}", @@ -5082,7 +5082,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(fluentd_output_status_emit_records * on(pod) group_left(host_ip) kube_pod_info{pod=\u007e\"fluentd.*\"}) by (host_ip,plugin_id)", + "expr": "sum(fluentd_output_status_emit_records * on(pod) group_left(host_ip) kube_pod_info{pod=~\"fluentd.*\"}) by (host_ip,plugin_id)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ host_ip }} - {{ plugin_id }}", @@ -5179,7 +5179,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(fluentd_output_status_num_errors * on(pod) group_left(host_ip) kube_pod_info{pod=\u007e\"fluentd.*\"}) by (host_ip, plugin_id)", + "expr": "sum(fluentd_output_status_num_errors * on(pod) group_left(host_ip) kube_pod_info{pod=~\"fluentd.*\"}) by (host_ip, plugin_id)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{host_ip}} - {{plugin_id}}", @@ -5278,7 +5278,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(fluentd_output_status_retry_count * on(pod) group_left(host_ip) kube_pod_info{pod=\u007e\"fluentd.*\"}) by (host_ip, plugin_id)", + "expr": "sum(fluentd_output_status_retry_count * on(pod) group_left(host_ip) kube_pod_info{pod=~\"fluentd.*\"}) by (host_ip, plugin_id)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{host_ip}} - {{plugin_id}}", @@ -5444,7 +5444,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:cluster_cpu_utilisation:ratio", + "expr": "node:cluster_cpu_utilisation:ratio{cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -5459,7 +5459,7 @@ items: "timeShift": null, "title": "CPU Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -5530,7 +5530,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_cpu_saturation_load1: / scalar(sum(min(kube_pod_info) by (node)))", + "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\"} / scalar(sum(min(kube_pod_info{cluster=\"$cluster\"}) by (node)))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -5545,7 +5545,7 @@ items: "timeShift": null, "title": "CPU Saturation (Load1)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -5628,7 +5628,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:cluster_memory_utilisation:ratio", + "expr": "node:cluster_memory_utilisation:ratio{cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -5643,7 +5643,7 @@ items: "timeShift": null, "title": "Memory Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -5714,7 +5714,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_memory_swap_io_bytes:sum_rate", + "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -5729,7 +5729,7 @@ items: "timeShift": null, "title": "Memory Saturation (Swap I/O)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -5812,7 +5812,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_disk_utilisation:avg_irate / scalar(:kube_pod_info_node_count:)", + "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -5827,7 +5827,7 @@ items: "timeShift": null, "title": "Disk IO Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -5898,7 +5898,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_disk_saturation:avg_irate / scalar(:kube_pod_info_node_count:)", + "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\"} / scalar(:kube_pod_info_node_count:{cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -5913,7 +5913,7 @@ items: "timeShift": null, "title": "Disk IO Saturation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -5996,7 +5996,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_net_utilisation:sum_irate", + "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -6011,7 +6011,7 @@ items: "timeShift": null, "title": "Net Utilisation (Transmitted)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6082,7 +6082,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_net_saturation:sum_irate", + "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -6097,7 +6097,7 @@ items: "timeShift": null, "title": "Net Saturation (Dropped)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6180,7 +6180,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(max(node_filesystem_size_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"} - node_filesystem_avail_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:\n", + "expr": "sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"} - node_filesystem_avail_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=~\"ext[234]|btrfs|xfs|zfs\", cluster=\"$cluster\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\"}\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{node}}", @@ -6195,7 +6195,7 @@ items: "timeShift": null, "title": "Disk Capacity", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6259,6 +6259,33 @@ items: "refresh": 1, "regex": "", "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_node_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false } ] }, @@ -6360,7 +6387,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_cpu_utilisation:avg1m{node=\"$node\"}", + "expr": "node:node_cpu_utilisation:avg1m{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Utilisation", @@ -6375,7 +6402,7 @@ items: "timeShift": null, "title": "CPU Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6446,7 +6473,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_cpu_saturation_load1:{node=\"$node\"}", + "expr": "node:node_cpu_saturation_load1:{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Saturation", @@ -6461,7 +6488,7 @@ items: "timeShift": null, "title": "CPU Saturation (Load1)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6544,7 +6571,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_memory_utilisation:{node=\"$node\"}", + "expr": "node:node_memory_utilisation:{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Memory", @@ -6559,7 +6586,7 @@ items: "timeShift": null, "title": "Memory Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6630,7 +6657,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_memory_swap_io_bytes:sum_rate{node=\"$node\"}", + "expr": "node:node_memory_swap_io_bytes:sum_rate{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Swap IO", @@ -6645,7 +6672,7 @@ items: "timeShift": null, "title": "Memory Saturation (Swap I/O)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6728,7 +6755,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_disk_utilisation:avg_irate{node=\"$node\"}", + "expr": "node:node_disk_utilisation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Utilisation", @@ -6743,7 +6770,7 @@ items: "timeShift": null, "title": "Disk IO Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6814,7 +6841,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_disk_saturation:avg_irate{node=\"$node\"}", + "expr": "node:node_disk_saturation:avg_irate{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Saturation", @@ -6829,7 +6856,7 @@ items: "timeShift": null, "title": "Disk IO Saturation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6912,7 +6939,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_net_utilisation:sum_irate{node=\"$node\"}", + "expr": "node:node_net_utilisation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Utilisation", @@ -6927,7 +6954,7 @@ items: "timeShift": null, "title": "Net Utilisation (Transmitted)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -6998,7 +7025,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_net_saturation:sum_irate{node=\"$node\"}", + "expr": "node:node_net_saturation:sum_irate{cluster=\"$cluster\", node=\"$node\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "Saturation", @@ -7013,7 +7040,7 @@ items: "timeShift": null, "title": "Net Saturation (Dropped)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -7096,7 +7123,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_filesystem_usage:\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{node=\"$node\"}\n", + "expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:{cluster=\"$cluster\", node=\"$node\"}\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", @@ -7111,7 +7138,7 @@ items: "timeShift": null, "title": "Disk Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -7176,6 +7203,33 @@ items: "regex": "", "type": "datasource" }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_node_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": null, "current": { @@ -7191,7 +7245,7 @@ items: "options": [ ], - "query": "label_values(kube_node_info, node)", + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", "refresh": 1, "regex": "", "sort": 2, @@ -7304,7 +7358,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[1m]))", + "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -7316,7 +7370,7 @@ items: "timeShift": null, "title": "CPU Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -7388,7 +7442,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores) / sum(node:node_num_cpu:sum)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) / sum(node:node_num_cpu:sum{cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -7400,7 +7454,7 @@ items: "timeShift": null, "title": "CPU Requests Commitment", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -7472,7 +7526,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores) / sum(node:node_num_cpu:sum)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) / sum(node:node_num_cpu:sum{cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -7484,7 +7538,7 @@ items: "timeShift": null, "title": "CPU Limits Commitment", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -7556,7 +7610,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum) / sum(:node_memory_MemTotal_bytes:sum)", + "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -7568,7 +7622,7 @@ items: "timeShift": null, "title": "Memory Utilisation", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -7640,7 +7694,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes) / sum(:node_memory_MemTotal_bytes:sum)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -7652,7 +7706,7 @@ items: "timeShift": null, "title": "Memory Requests Commitment", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -7724,7 +7778,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes) / sum(:node_memory_MemTotal_bytes:sum)", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) / sum(:node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 2, @@ -7736,7 +7790,7 @@ items: "timeShift": null, "title": "Memory Limits Commitment", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -7819,7 +7873,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate) by (namespace)", + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -7834,7 +7888,7 @@ items: "timeShift": null, "title": "CPU Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -8022,7 +8076,7 @@ items: "decimals": 2, "link": true, "linkTooltip": "Drill down", - "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-namespace=$__cell", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ @@ -8048,7 +8102,7 @@ items: ], "targets": [ { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate) by (namespace)", + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8057,7 +8111,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores) by (namespace)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8066,7 +8120,7 @@ items: "step": 10 }, { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores) by (namespace)", + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8075,7 +8129,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores) by (namespace)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8084,7 +8138,7 @@ items: "step": 10 }, { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores) by (namespace)", + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8100,7 +8154,7 @@ items: "timeShift": null, "title": "CPU Quota", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -8184,7 +8238,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{container_name!=\"\"}) by (namespace)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{namespace}}", @@ -8199,7 +8253,7 @@ items: "timeShift": null, "title": "Memory Usage (w/o cache)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -8387,7 +8441,7 @@ items: "decimals": 2, "link": true, "linkTooltip": "Drill down", - "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-namespace=$__cell", + "linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell", "pattern": "namespace", "thresholds": [ @@ -8413,7 +8467,7 @@ items: ], "targets": [ { - "expr": "sum(container_memory_rss{container_name!=\"\"}) by (namespace)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8422,7 +8476,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes) by (namespace)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8431,7 +8485,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{container_name!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes) by (namespace)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8440,7 +8494,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes) by (namespace)", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8449,7 +8503,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_rss{container_name!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes) by (namespace)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", container_name!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\"}) by (namespace)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8465,7 +8519,7 @@ items: "timeShift": null, "title": "Requests by Namespace", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -8530,6 +8584,33 @@ items: "refresh": 1, "regex": "", "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(node_cpu_seconds_total, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false } ] }, @@ -8631,7 +8712,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\"}) by (pod_name)", + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod_name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod_name}}", @@ -8646,7 +8727,7 @@ items: "timeShift": null, "title": "CPU Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -8834,7 +8915,7 @@ items: "decimals": 2, "link": true, "linkTooltip": "Drill down", - "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ @@ -8860,7 +8941,7 @@ items: ], "targets": [ { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8869,7 +8950,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8878,7 +8959,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8887,7 +8968,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8896,7 +8977,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -8912,7 +8993,7 @@ items: "timeShift": null, "title": "CPU Quota", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -8996,7 +9077,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_usage_bytes{namespace=\"$namespace\", container_name!=\"\"}) by (pod_name)", + "expr": "sum(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container_name!=\"\"}) by (pod_name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod_name}}", @@ -9011,7 +9092,7 @@ items: "timeShift": null, "title": "Memory Usage (w/o cache)", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -9253,7 +9334,7 @@ items: "decimals": 2, "link": true, "linkTooltip": "Drill down", - "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell", + "linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell", "pattern": "pod", "thresholds": [ @@ -9279,7 +9360,7 @@ items: ], "targets": [ { - "expr": "sum(label_replace(container_memory_usage_bytes{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9288,7 +9369,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9297,7 +9378,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_usage_bytes{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9306,7 +9387,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9315,7 +9396,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_usage_bytes{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9324,7 +9405,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_rss{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9333,7 +9414,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_cache{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9342,7 +9423,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_swap{namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", + "expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\",container_name!=\"\"}, \"pod\", \"$1\", \"pod_name\", \"(.*)\")) by (pod)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9358,7 +9439,7 @@ items: "timeShift": null, "title": "Memory Quota", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -9424,6 +9505,33 @@ items: "regex": "", "type": "datasource" }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": null, "current": { @@ -9439,7 +9547,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info, namespace)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, @@ -9551,7 +9659,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}) by (container_name)", + "expr": "sum(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", cluster=\"$cluster\"}) by (container_name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container_name}}", @@ -9566,7 +9674,7 @@ items: "timeShift": null, "title": "CPU Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -9780,7 +9888,7 @@ items: ], "targets": [ { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9789,7 +9897,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9798,7 +9906,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9807,7 +9915,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9816,7 +9924,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(label_replace(namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -9832,7 +9940,7 @@ items: "timeShift": null, "title": "CPU Quota", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -9916,7 +10024,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_rss{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container_name}} (RSS)", @@ -9924,7 +10032,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_cache{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container_name}} (Cache)", @@ -9932,7 +10040,7 @@ items: "step": 10 }, { - "expr": "sum(container_memory_swap{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", + "expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}) by (container_name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{container_name}} (Swap)", @@ -9947,7 +10055,7 @@ items: "timeShift": null, "title": "Memory Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -10215,7 +10323,7 @@ items: ], "targets": [ { - "expr": "sum(label_replace(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"POD\", container_name!=\"\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10224,7 +10332,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10233,7 +10341,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10242,7 +10350,7 @@ items: "step": 10 }, { - "expr": "sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10251,7 +10359,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "expr": "sum(label_replace(container_memory_usage_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name!=\"\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10260,7 +10368,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_rss{namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "expr": "sum(label_replace(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10269,7 +10377,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_cache{namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "expr": "sum(label_replace(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10278,7 +10386,7 @@ items: "step": 10 }, { - "expr": "sum(label_replace(container_memory_swap{namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", + "expr": "sum(label_replace(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name != \"\", container_name != \"POD\"}, \"container\", \"$1\", \"container_name\", \"(.*)\")) by (container)", "format": "table", "instant": true, "intervalFactor": 2, @@ -10294,7 +10402,7 @@ items: "timeShift": null, "title": "Memory Quota", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -10360,6 +10468,33 @@ items: "regex": "", "type": "datasource" }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": null, "current": { @@ -10375,7 +10510,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info, namespace)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, @@ -10402,7 +10537,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info{namespace=\"$namespace\"}, pod)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", "refresh": 1, "regex": "", "sort": 2, @@ -11320,7 +11455,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "(sum(node_filesystem_size_bytes{device=\u007e\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=\u007e\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=\u007e\"/dev/.*\"}) * 100", + "expr": "(sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) - sum(node_filesystem_free_bytes{device=~\"/dev/.*\"}) ) / sum(node_filesystem_size_bytes{device=~\"/dev/.*\"}) * 100", "format": "time_series", "interval": "10s", "intervalFactor": 1, @@ -11837,7 +11972,7 @@ items: "step": 10 }, { - "expr": "topk(10,sum(container_memory_rss{name=\u007e\".+\"}) by (container_name))", + "expr": "topk(10,sum(container_memory_rss{name=~\".+\"}) by (container_name))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ container_name }}", @@ -11947,7 +12082,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod_name=\u007e\".+\"}[5m])) by (pod_name))", + "expr": "topk(10,sum(rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[5m])) by (pod_name))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ pod_name }}", @@ -12068,7 +12203,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod_name=\u007e\".+\"}[5m])) by (pod_name))", + "expr": "topk(10,sum(rate(container_network_receive_bytes_total{pod_name=~\".+\"}[5m])) by (pod_name))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{pod_name}}", @@ -12076,7 +12211,7 @@ items: "step": 240 }, { - "expr": "- rate(container_network_transmit_bytes_total{pod_name=\u007e\".+\"}[$interval])", + "expr": "- rate(container_network_transmit_bytes_total{pod_name=~\".+\"}[$interval])", "format": "time_series", "hide": true, "intervalFactor": 2, @@ -12189,7 +12324,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=\u007e\".*\"}[1m]) ))", + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_receive_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", "format": "time_series", "interval": "10s", "intervalFactor": 1, @@ -12199,7 +12334,7 @@ items: "step": 10 }, { - "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=\u007e\".*\"}[1m]) ))", + "expr": "sort_desc(sum by (kubernetes_pod_name) (rate (container_network_transmit_bytes_total{name!=\"\", kubernetes_pod_name=~\".*\"}[1m]) ))", "format": "time_series", "interval": "10s", "intervalFactor": 1, @@ -12380,21 +12515,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(node_load1{job=\"node-exporter\", instance=\"$instance\"})", + "expr": "max(node_load1{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "load 1m", "refId": "A" }, { - "expr": "max(node_load5{job=\"node-exporter\", instance=\"$instance\"})", + "expr": "max(node_load5{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "load 5m", "refId": "B" }, { - "expr": "max(node_load15{job=\"node-exporter\", instance=\"$instance\"})", + "expr": "max(node_load15{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "load 15m", @@ -12408,7 +12543,7 @@ items: "timeShift": null, "title": "System load", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -12485,7 +12620,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m]))", + "expr": "sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{cpu}}", @@ -12499,7 +12634,7 @@ items: "timeShift": null, "title": "Usage Per Core", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -12589,7 +12724,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "max (sum by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m])) ) * 100\n", + "expr": "max (sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m])) ) * 100\n", "format": "time_series", "intervalFactor": 10, "legendFormat": "{{ cpu }}", @@ -12603,7 +12738,7 @@ items: "timeShift": null, "title": "CPU Utilizaion", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -12697,7 +12832,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n", + "expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -12706,6 +12841,9 @@ items: ], "thresholds": "80, 90", "title": "CPU Usage", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -12774,28 +12912,28 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "expr": "max(\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory used", "refId": "A" }, { - "expr": "max(node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"})", + "expr": "max(node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory buffers", "refId": "B" }, { - "expr": "max(node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"})", + "expr": "max(node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory cached", "refId": "C" }, { - "expr": "max(node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"})", + "expr": "max(node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "memory free", @@ -12809,7 +12947,7 @@ items: "timeShift": null, "title": "Memory Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -12903,7 +13041,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "max(\n (\n (\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", + "expr": "max(\n (\n (\n node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -12912,6 +13050,9 @@ items: ], "thresholds": "80, 90", "title": "Memory Usage", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -12987,21 +13128,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "max(rate(node_disk_read_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "read", "refId": "A" }, { - "expr": "max(rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "max(rate(node_disk_written_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "written", "refId": "B" }, { - "expr": "max(rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\"}[2m]))", + "expr": "max(rate(node_disk_io_time_seconds_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}[2m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "io time", @@ -13015,7 +13156,7 @@ items: "timeShift": null, "title": "Disk I/O", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -13092,7 +13233,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "node:node_filesystem_usage:\n", + "expr": "node:node_filesystem_usage:{cluster=\"$cluster\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", @@ -13106,7 +13247,7 @@ items: "timeShift": null, "title": "Disk Space Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -13196,7 +13337,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))", + "expr": "max(rate(node_network_receive_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", @@ -13210,7 +13351,7 @@ items: "timeShift": null, "title": "Network Received", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -13287,7 +13428,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))", + "expr": "max(rate(node_network_transmit_bytes_total{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\", device!~\"lo\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", @@ -13301,7 +13442,7 @@ items: "timeShift": null, "title": "Network Transmitted", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -13391,14 +13532,14 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(\n node_filesystem_files{job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{job=\"node-exporter\", instance=\"$instance\"}\n)\n", + "expr": "max(\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "inodes used", "refId": "A" }, { - "expr": "max(node_filesystem_files_free{job=\"node-exporter\", instance=\"$instance\"})", + "expr": "max(node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "inodes free", @@ -13412,7 +13553,7 @@ items: "timeShift": null, "title": "Inodes Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -13506,7 +13647,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "max(\n (\n (\n node_filesystem_files{job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_filesystem_files{job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", + "expr": "max(\n (\n (\n node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n - node_filesystem_files_free{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_filesystem_files{cluster=\"$cluster\", job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -13515,6 +13656,9 @@ items: ], "thresholds": "80, 90", "title": "Inodes Usage", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -13563,6 +13707,32 @@ items: "allValue": null, "current": { + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + }, "datasource": "$datasource", "hide": 0, @@ -13573,7 +13743,7 @@ items: "options": [ ], - "query": "label_values(node_boot_time_seconds{job=\"node-exporter\"}, instance)", + "query": "label_values(node_boot_time_seconds{cluster=\"$cluster\", job=\"node-exporter\"}, instance)", "refresh": 2, "regex": "", "sort": 0, @@ -13698,7 +13868,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", persistentvolumeclaim=\"$volume\"} - kubelet_volume_stats_available_bytes{job=\"kubelet\", persistentvolumeclaim=\"$volume\"}) / kubelet_volume_stats_capacity_bytes{job=\"kubelet\", persistentvolumeclaim=\"$volume\"} * 100\n", + "expr": "(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"} - kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"}) / kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"} * 100\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ Usage }}", @@ -13712,7 +13882,7 @@ items: "timeShift": null, "title": "Volume Space Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -13802,7 +13972,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "kubelet_volume_stats_inodes_used{job=\"kubelet\", persistentvolumeclaim=\"$volume\"} / kubelet_volume_stats_inodes{job=\"kubelet\", persistentvolumeclaim=\"$volume\"} * 100\n", + "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"} / kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", persistentvolumeclaim=\"$volume\"} * 100\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ Usage }}", @@ -13816,7 +13986,7 @@ items: "timeShift": null, "title": "Volume inodes Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -13886,6 +14056,32 @@ items: "allValue": null, "current": { + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kubelet_volume_stats_capacity_bytes, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + }, "datasource": "$datasource", "hide": 0, @@ -13896,7 +14092,7 @@ items: "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}, exported_namespace)", + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, exported_namespace)", "refresh": 2, "regex": "", "sort": 0, @@ -13922,7 +14118,7 @@ items: "options": [ ], - "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", exported_namespace=\"$namespace\"}, persistentvolumeclaim)", + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", exported_namespace=\"$namespace\"}, persistentvolumeclaim)", "refresh": 2, "regex": "", "sort": 0, @@ -14042,25 +14238,26 @@ items: ], "spaceLength": 10, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by(container_name) (container_memory_usage_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=\u007e\"$container\", container_name!=\"POD\"})", + "expr": "sum by(container_name) (container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Current: {{ container_name }}", "refId": "A" }, { - "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\"})", + "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Requested: {{ container }}", "refId": "B" }, { - "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\"})", + "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Limit: {{ container }}", @@ -14074,7 +14271,7 @@ items: "timeShift": null, "title": "Memory Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -14159,11 +14356,12 @@ items: ], "spaceLength": 10, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", namespace=\"$namespace\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))", + "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ container_name }}", @@ -14177,7 +14375,7 @@ items: "timeShift": null, "title": "CPU Usage", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -14262,11 +14460,12 @@ items: ], "spaceLength": 10, + "span": 12, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", + "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ pod_name }}", @@ -14280,7 +14479,7 @@ items: "timeShift": null, "title": "Network I/O", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -14350,6 +14549,32 @@ items: "allValue": null, "current": { + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_pod_info, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + }, "datasource": "$datasource", "hide": 0, @@ -14360,7 +14585,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info, namespace)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)", "refresh": 2, "regex": "", "sort": 0, @@ -14386,7 +14611,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_info{namespace=\u007e\"$namespace\"}, pod)", + "query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)", "refresh": 2, "regex": "", "sort": 0, @@ -14412,7 +14637,7 @@ items: "options": [ ], - "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", + "query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)", "refresh": 2, "regex": "", "sort": 0, @@ -18556,7 +18781,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\")/1000 as \"usage_rate.mean\" FROM \"cpu/usage_rate\" WHERE \"type\" = 'pod_container' AND \"container_name\" =\u007e /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "query": "SELECT mean(\"value\")/1000 as \"usage_rate.mean\" FROM \"cpu/usage_rate\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -18599,7 +18824,7 @@ items: "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\")/1000 as \"usage_rate.mean\" FROM \"cpu/usage_rate\" WHERE \"type\" = 'pod_container' AND \"container_name\" =\u007e /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "query": "SELECT mean(\"value\")/1000 as \"usage_rate.mean\" FROM \"cpu/usage_rate\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -18642,7 +18867,7 @@ items: "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\")/1000 as \"CPU LIMIT\" FROM \"cpu/limit\" WHERE \"type\" = 'pod_container' AND \"container_name\" =\u007e /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "query": "SELECT mean(\"value\")/1000 as \"CPU LIMIT\" FROM \"cpu/limit\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -18771,7 +18996,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"usage_rate.mean\" FROM \"memory/usage\" WHERE \"type\" = 'pod_container' AND \"container_name\" =\u007e /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"usage_rate.mean\" FROM \"memory/usage\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -18814,7 +19039,7 @@ items: "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"usage_rate.mean\" FROM \"memory/usage\" WHERE \"type\" = 'pod_container' AND \"container_name\" =\u007e /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"usage_rate.mean\" FROM \"memory/usage\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -18857,7 +19082,7 @@ items: "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"CPU LIMIT\" FROM \"memory/limit\" WHERE \"type\" = 'pod_container' AND \"container_name\" =\u007e /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"CPU LIMIT\" FROM \"memory/limit\" WHERE \"type\" = 'pod_container' AND \"container_name\" =~ /prometheus/ AND $timeFilter GROUP BY container_name, time($__interval) fill(null)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -19002,7 +19227,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"rx_rate.mean \" FROM \"network/rx_rate\" WHERE \"labels\" =\u007e /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"rx_rate.mean \" FROM \"network/rx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -19044,7 +19269,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"rx_rate.mean\" FROM \"network/rx_rate\" WHERE \"labels\" =\u007e /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"rx_rate.mean\" FROM \"network/rx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -19086,7 +19311,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\")*-1 as \"tx_rate.mean \" FROM \"network/tx_rate\" WHERE \"labels\" =\u007e /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", + "query": "SELECT mean(\"value\")*-1 as \"tx_rate.mean \" FROM \"network/tx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -19128,7 +19353,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\")*-1 as \"tx_rate.mean\" FROM \"network/tx_rate\" WHERE \"labels\" =\u007e /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", + "query": "SELECT mean(\"value\")*-1 as \"tx_rate.mean\" FROM \"network/tx_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter GROUP BY labels, time($__interval) fill(null)", "rawQuery": true, "refId": "D", "resultFormat": "time_series", @@ -19170,7 +19395,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"rx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =\u007e /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"rx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", "rawQuery": true, "refId": "E", "resultFormat": "time_series", @@ -19212,7 +19437,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"rx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =\u007e /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"rx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", "rawQuery": true, "refId": "F", "resultFormat": "time_series", @@ -19254,7 +19479,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"tx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =\u007e /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"tx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", "rawQuery": true, "refId": "G", "resultFormat": "time_series", @@ -19296,7 +19521,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"tx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =\u007e /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"tx_errors_rate.mean\" FROM \"network/rx_errors_rate\" WHERE \"labels\" =~ /app:prometheus/ AND $timeFilter AND \"value\">0 GROUP BY labels, time($__interval) fill(null)", "rawQuery": true, "refId": "H", "resultFormat": "time_series", @@ -19425,7 +19650,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"DISK_USAGE.mean \" FROM \"filesystem/usage\" WHERE \"labels\" =\u007e /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"DISK_USAGE.mean \" FROM \"filesystem/usage\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -19468,7 +19693,7 @@ items: "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"DISK_USAGE.mean \" FROM \"filesystem/usage\" WHERE \"labels\" =\u007e /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"DISK_USAGE.mean \" FROM \"filesystem/usage\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -19511,7 +19736,7 @@ items: "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"DISK_LIMIT.mean \" FROM \"filesystem/limit\" WHERE \"labels\" =\u007e /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"DISK_LIMIT.mean \" FROM \"filesystem/limit\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -19554,7 +19779,7 @@ items: "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"DISK_LIMIT.mean\" FROM \"filesystem/limit\" WHERE \"labels\" =\u007e /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"DISK_LIMIT.mean\" FROM \"filesystem/limit\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", "rawQuery": true, "refId": "D", "resultFormat": "time_series", @@ -19683,7 +19908,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"FREE_INODES.mean \" FROM \"filesystem/inodes_free\" WHERE \"labels\" =\u007e /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"FREE_INODES.mean \" FROM \"filesystem/inodes_free\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -19726,7 +19951,7 @@ items: "hide": false, "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"FREE_INODES.mean\" FROM \"filesystem/inodes_free\" WHERE \"labels\" =\u007e /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"FREE_INODES.mean\" FROM \"filesystem/inodes_free\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -19768,7 +19993,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"USED_INODES.mean \" FROM \"filesystem/inodes\" WHERE \"labels\" =\u007e /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"USED_INODES.mean \" FROM \"filesystem/inodes\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -19810,7 +20035,7 @@ items: ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") as \"USED_INODES.mean\" FROM \"filesystem/inodes\" WHERE \"labels\" =\u007e /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", + "query": "SELECT mean(\"value\") as \"USED_INODES.mean\" FROM \"filesystem/inodes\" WHERE \"labels\" =~ /data-prometheus:true/ AND $timeFilter GROUP BY time($__interval) fill(null)", "rawQuery": true, "refId": "D", "resultFormat": "time_series", @@ -20295,7 +20520,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}[3m]))", + "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20304,6 +20529,9 @@ items: ], "thresholds": "", "title": "CPU", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -20375,7 +20603,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}) / 1024^3", + "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}) / 1024^3", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20384,6 +20612,9 @@ items: ], "thresholds": "", "title": "Memory", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -20455,7 +20686,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\",pod_name=\u007e\"$statefulset.*\"}[3m]))", + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod_name=~\"$statefulset.*\"}[3m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20464,6 +20695,9 @@ items: ], "thresholds": "", "title": "Network", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -20550,7 +20784,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20559,6 +20793,9 @@ items: ], "thresholds": "", "title": "Desired Replicas", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -20631,7 +20868,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20640,6 +20877,9 @@ items: ], "thresholds": "", "title": "Replicas of current version", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -20712,7 +20952,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", + "expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20721,6 +20961,9 @@ items: ], "thresholds": "", "title": "Observed Generation", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -20793,7 +21036,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", namespace=\"$namespace\"}) without (instance, pod)", + "expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -20802,6 +21045,9 @@ items: ], "thresholds": "", "title": "Metadata Generation", + "tooltip": { + "shared": false + }, "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -20869,35 +21115,35 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\",namespace=\"$namespace\"}) without (instance, pod)", + "expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "replicas specified", "refId": "A" }, { - "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\",namespace=\"$namespace\"}) without (instance, pod)", + "expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "replicas created", "refId": "B" }, { - "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\",namespace=\"$namespace\"}) without (instance, pod)", + "expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "ready", "refId": "C" }, { - "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\",namespace=\"$namespace\"}) without (instance, pod)", + "expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "replicas of current version", "refId": "D" }, { - "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\",namespace=\"$namespace\"}) without (instance, pod)", + "expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)", "format": "time_series", "intervalFactor": 2, "legendFormat": "updated", @@ -20911,7 +21157,7 @@ items: "timeShift": null, "title": "Replicas", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, @@ -20981,6 +21227,32 @@ items: "allValue": null, "current": { + }, + "datasource": "$datasource", + "hide": 2, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ + + ], + "query": "label_values(kube_statefulset_metadata_generation, cluster)", + "refresh": 2, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ + + ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + }, "datasource": "$datasource", "hide": 0, @@ -21174,7 +21446,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "count(kube_pod_status_ready{namespace=\"$namespace\",condition=\"true\",pod=\u007e\"traefik.*\"})", + "expr": "count(kube_pod_status_ready{namespace=\"$namespace\",condition=\"true\",pod=~\"traefik.*\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -21256,7 +21528,7 @@ items: "tableColumn": "", "targets": [ { - "expr": "time() - max(process_start_time_seconds{job=\u007e\"traefik.*\"})", + "expr": "time() - max(process_start_time_seconds{job=~\"traefik.*\"})", "format": "time_series", "intervalFactor": 2, "refId": "A" @@ -22149,7 +22421,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code=\u007e\"2..\"}[5m])) by (method, code)", + "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code=~\"2..\"}[5m])) by (method, code)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{method}} : {{code}}", @@ -22248,7 +22520,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code=\u007e\"5..\"}[5m])) by (method, code)", + "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code=~\"5..\"}[5m])) by (method, code)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{method}} : {{code}}", @@ -22449,7 +22721,7 @@ items: "steppedLine": false, "targets": [ { - "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code!\u007e\"2..|5..\"}[5m])) by (method, code)", + "expr": "sum(rate(traefik_backend_requests_total{namespace=\"$namespace\",code!~\"2..|5..\"}[5m])) by (method, code)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{ method }} : {{code}}", @@ -22563,21 +22835,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=\u007e\"traefik-ingress-controller.*\"})", + "expr": "max(container_memory_usage_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "Max memory used", "refId": "A" }, { - "expr": "avg(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod_name=\u007e\"traefik-ingress-controller.*\"})", + "expr": "avg(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requested memory usage", "refId": "B" }, { - "expr": "avg(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod_name=\u007e\"traefik-ingress-controller.*\"})", + "expr": "avg(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "Limit memory usage", @@ -22675,21 +22947,21 @@ items: "steppedLine": false, "targets": [ { - "expr": "max(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=\u007e\"traefik-ingress-controller.*\"}[1m]))", + "expr": "max(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"}[1m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Max cpu used", "refId": "A" }, { - "expr": "avg(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod_name=\u007e\"traefik-ingress-controller.*\"})", + "expr": "avg(kube_pod_container_resource_requests_cpu_cores{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requested cpu usage", "refId": "B" }, { - "expr": "avg(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod_name=\u007e\"traefik-ingress-controller.*\"})", + "expr": "avg(kube_pod_container_resource_limits_cpu_cores{namespace=\"$namespace\", pod_name=~\"traefik-ingress-controller.*\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "Limit cpu usage", diff --git a/manifests/prometheus-adapter-configMap.yaml b/manifests/prometheus-adapter-configMap.yaml index a231de3..d6ebd78 100644 --- a/manifests/prometheus-adapter-configMap.yaml +++ b/manifests/prometheus-adapter-configMap.yaml @@ -4,7 +4,7 @@ data: resourceRules: cpu: containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>) - nodeQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, id='/'}[1m])) by (<<.GroupBy>>) + nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>) resources: overrides: node: @@ -16,7 +16,7 @@ data: containerLabel: container_name memory: containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>}) by (<<.GroupBy>>) - nodeQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,id='/'}) by (<<.GroupBy>>) + nodeQuery: sum(node:node_memory_bytes_total:sum{<<.LabelMatchers>>} - node:node_memory_bytes_available:sum{<<.LabelMatchers>>}) by (<<.GroupBy>>) resources: overrides: node: diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 46c849a..357725f 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -225,21 +225,21 @@ spec: ) record: node:node_memory_swap_io_bytes:sum_rate - expr: | - avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m])) + avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) record: :node_disk_utilisation:avg_irate - expr: | avg by (node) ( - irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) + irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) record: node:node_disk_utilisation:avg_irate - expr: | - avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3) + avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3) record: :node_disk_saturation:avg_irate - expr: | avg by (node) ( - irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3 + irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3 * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info: ) @@ -795,9 +795,9 @@ spec: message: API server is returning errors for {{ $value }}% of requests. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod) + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 10 + sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3 for: 10m labels: severity: critical @@ -806,9 +806,33 @@ spec: message: API server is returning errors for {{ $value }}% of requests. runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh expr: | - sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod) + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) / - sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 5 + sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1 + for: 10m + labels: + severity: warning + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{ $value }}% of requests for + {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10 + for: 10m + labels: + severity: critical + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{ $value }}% of requests for + {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh + expr: | + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5 for: 10m labels: severity: warning @@ -977,7 +1001,7 @@ spec: log (WAL).' summary: Prometheus write-ahead log is corrupted expr: | - tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0 + prometheus_tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0 for: 4h labels: severity: warning