Updated vendor libraries

This commit is contained in:
CarlosEDP 2019-02-01 11:52:53 -02:00
parent 53b31ec856
commit 5065d369e1
16 changed files with 164 additions and 46 deletions

View File

@ -8,7 +8,7 @@
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
}
},
"version": "9536d7787789b74b692cd8a5482a2801b1aba232"
"version": "e123a1de479dbb911b4070f1bfcbd1e65e02209e"
},
{
"name": "ksonnet",
@ -28,7 +28,7 @@
"subdir": ""
}
},
"version": "4c23c06fff9ef50744f5ed306c9ab0c4bd78a144"
"version": "668950e4af13f0153fa1d7b58ebe7023b33f2217"
},
{
"name": "grafonnet",
@ -38,7 +38,7 @@
"subdir": "grafonnet"
}
},
"version": "eea8b5ba6b8883cf2df5a17c39a42c4b57c0d63e"
"version": "11022f5e920ac1ea960556193e3f0ab57d70d7c5"
},
{
"name": "grafana-builder",
@ -48,7 +48,7 @@
"subdir": "grafana-builder"
}
},
"version": "c6932cf90bce4fef218b4308effc9f15c4219a01"
"version": "ec3d4f943df01f517a083305666cd1c87bcc7e94"
},
{
"name": "grafana",
@ -58,7 +58,7 @@
"subdir": "grafana"
}
},
"version": "da19aef6f5b378fb5281e6f61dbadbbf734d45ee"
"version": "9ddf5a198b0f7c898dc061158ea427112acbae11"
},
{
"name": "prometheus-operator",
@ -68,7 +68,7 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "72ec4b9b16ef11700724dc71fec77112536eed40"
"version": "174c9bf17bec78b055e8e63c6ae3a3dc9bb0a3a8"
},
{
"name": "etcd-mixin",
@ -78,7 +78,7 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "15b6a17be48dea91a11497980b9adab541add7f0"
"version": "6070db22ed3d46372a5600fe8f35907f4d706bdb"
}
]
}

View File

@ -1378,6 +1378,12 @@ spec:
under. This is necessary to generate correct URLs. This is necessary
if Alertmanager is not served from root of a DNS name.
type: string
image:
description: Image if specified has precedence over baseImage, tag and
sha combinations. Specifying the version is still necessary to ensure
the Prometheus Operator knows what version of Alertmanager is being
configured.
type: string
imagePullSecrets:
description: An optional list of references to secrets in the same namespace
to use for pulling prometheus and alertmanager images from registries

View File

@ -1550,6 +1550,12 @@ spec:
under. This is necessary to generate correct URLs. This is necessary
if Prometheus is not served from root of a DNS name.
type: string
image:
description: Image if specified has precedence over baseImage, tag and
sha combinations. Specifying the version is still necessary to ensure
the Prometheus Operator knows what version of Prometheus is being
configured.
type: string
imagePullSecrets:
description: An optional list of references to secrets in the same namespace
to use for pulling prometheus and alertmanager images from registries
@ -1863,6 +1869,21 @@ spec:
priorityClassName:
description: Priority class assigned to the Pods
type: string
query:
description: QuerySpec defines the query command line flags when starting
Prometheus.
properties:
lookbackDelta:
description: The delta difference allowed for retrieving metrics
during expression evaluations.
type: string
maxConcurrency:
description: Number of concurrent queries that can be run at once.
format: int32
type: integer
timeout:
description: Maximum time a query may take before being aborted.
type: string
remoteRead:
description: If specified, the remote_read spec. This is an experimental
feature, it may change in any upcoming release in a breaking way.
@ -2943,6 +2964,12 @@ spec:
type: boolean
required:
- key
image:
description: Image if specified has precedence over baseImage, tag
and sha combinations. Specifying the version is still necessary
to ensure the Prometheus Operator knows what version of Thanos
is being configured.
type: string
peers:
description: Peers is a DNS name for Thanos to discover peers through.
type: string

View File

@ -4896,6 +4896,12 @@ items:
data:
nodes.json: |-
{
"__inputs": [
],
"__requires": [
],
"annotations": {
"list": [
@ -6208,6 +6214,12 @@ items:
data:
persistentvolumesusage.json: |-
{
"__inputs": [
],
"__requires": [
],
"annotations": {
"list": [
@ -6551,6 +6563,12 @@ items:
data:
pods.json: |-
{
"__inputs": [
],
"__requires": [
],
"annotations": {
"list": [
@ -6730,7 +6748,7 @@ items:
"steppedLine": false,
"targets": [
{
"expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))",
"expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", namespace=\"$namespace\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ container_name }}",
@ -6833,7 +6851,7 @@ items:
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", pod_name=\"$pod\"}[1m])))",
"expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ pod_name }}",
@ -7035,6 +7053,12 @@ items:
data:
statefulset.json: |-
{
"__inputs": [
],
"__requires": [
],
"annotations": {
"list": [

View File

@ -72,6 +72,8 @@ spec:
- mountPath: /etc/grafana
name: grafana-config
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534

View File

@ -1,6 +1,8 @@
apiVersion: v1
kind: Service
metadata:
labels:
app: grafana
name: grafana
namespace: monitoring
spec:

View File

@ -0,0 +1,12 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: grafana
namespace: monitoring
spec:
endpoints:
- interval: 15s
port: http
selector:
matchLabels:
app: grafana

View File

@ -67,3 +67,10 @@ rules:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch

View File

@ -19,6 +19,7 @@ spec:
- --web.listen-address=127.0.0.1:9100
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
image: carlosedp/node_exporter:v0.17.0
@ -42,7 +43,9 @@ spec:
name: root
readOnly: true
- args:
- --logtostderr
- --secure-listen-address=$(IP):9100
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:9100/
env:
- name: IP

View File

@ -25,7 +25,7 @@ spec:
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
- --secure-port=6443
image: directxman12/k8s-prometheus-adapter-arm64:v0.4.1
image: carlosedp/k8s-prometheus-adapter:v0.4.1
name: prometheus-adapter
ports:
- containerPort: 6443
@ -39,6 +39,8 @@ spec:
- mountPath: /etc/adapter
name: config
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
serviceAccountName: prometheus-adapter
volumes:
- emptyDir: {}

View File

@ -39,4 +39,17 @@ items:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
- apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: logging
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
kind: RoleBindingList

View File

@ -9,7 +9,6 @@ items:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
@ -26,7 +25,6 @@ items:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
@ -43,7 +41,22 @@ items:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: logging
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods

View File

@ -288,21 +288,24 @@ spec:
record: 'node:node_inodes_free:'
- name: kube-prometheus-node-recording.rules
rules:
- expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[3m])) BY (instance)
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY
(instance)
record: instance:node_cpu:rate:sum
- expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"}))
- expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}))
BY (instance)
record: instance:node_filesystem_usage:sum
- expr: sum(rate(node_network_receive_bytes[3m])) BY (instance)
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance)
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode)
/ ON(instance) GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance)
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT
(cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu{mode!="idle",mode!="iowait"}[5m]))
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu))
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
BY (instance, cpu))
record: cluster:node_cpu:ratio
- name: kubernetes-absent
rules:
@ -311,7 +314,7 @@ spec:
message: Alertmanager has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown
expr: |
absent(up{job="alertmanager-main"} == 1)
absent(up{job="alertmanager-main",namespace="monitoring"} == 1)
for: 15m
labels:
severity: critical
@ -383,7 +386,7 @@ spec:
message: Prometheus has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown
expr: |
absent(up{job="prometheus-k8s"} == 1)
absent(up{job="prometheus-k8s",namespace="monitoring"} == 1)
for: 15m
labels:
severity: critical
@ -392,7 +395,7 @@ spec:
message: PrometheusOperator has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown
expr: |
absent(up{job="prometheus-operator"} == 1)
absent(up{job="prometheus-operator",namespace="monitoring"} == 1)
for: 15m
labels:
severity: critical
@ -626,8 +629,8 @@ spec:
}} for container {{ $labels.container_name }} in pod {{ $labels.pod_name
}}.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{}[5m]))
by (container_name, pod_name, namespace) \n / \nsum(increase(container_cpu_cfs_periods_total{}[5m]))
expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!=\"\",
}[5m])) by (container_name, pod_name, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m]))
by (container_name, pod_name, namespace)\n > 25 \n"
for: 15m
labels:
@ -773,7 +776,8 @@ spec:
severity: warning
- alert: KubeClientCertificateExpiration
annotations:
message: Kubernetes API certificate is expiring in less than 7 days.
message: A client certificate used to authenticate to the apiserver is expiring
in less than 7 days.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
expr: |
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
@ -781,7 +785,8 @@ spec:
severity: warning
- alert: KubeClientCertificateExpiration
annotations:
message: Kubernetes API certificate is expiring in less than 24 hours.
message: A client certificate used to authenticate to the apiserver is expiring
in less than 24 hours.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
expr: |
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
@ -794,7 +799,7 @@ spec:
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
are out of sync.
expr: |
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1
for: 5m
labels:
severity: critical
@ -803,7 +808,7 @@ spec:
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
}}/{{ $labels.pod}}.
expr: |
alertmanager_config_last_reload_successful{job="alertmanager-main"} == 0
alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
for: 10m
labels:
severity: warning
@ -811,9 +816,9 @@ spec:
annotations:
message: Alertmanager has not found all other members of the cluster.
expr: |
alertmanager_cluster_members{job="alertmanager-main"}
alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{job="alertmanager-main"})
count by (service) (alertmanager_cluster_members{job="alertmanager-main",namespace="monitoring"})
for: 5m
labels:
severity: critical
@ -860,7 +865,7 @@ spec:
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
summary: Reloading Prometheus' configuration failed
expr: |
prometheus_config_last_reload_successful{job="prometheus-k8s"} == 0
prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"} == 0
for: 10m
labels:
severity: warning
@ -870,7 +875,7 @@ spec:
$labels.pod}}
summary: Prometheus' alert notification queue is running full
expr: |
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="prometheus-k8s"}
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}
for: 10m
labels:
severity: warning
@ -880,7 +885,7 @@ spec:
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
summary: Errors while sending alert from Prometheus
expr: |
rate(prometheus_notifications_errors_total{job="prometheus-k8s"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s"}[5m]) > 0.01
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.01
for: 10m
labels:
severity: warning
@ -890,7 +895,7 @@ spec:
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
summary: Errors while sending alerts from Prometheus
expr: |
rate(prometheus_notifications_errors_total{job="prometheus-k8s"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s"}[5m]) > 0.03
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.03
for: 10m
labels:
severity: critical
@ -900,7 +905,7 @@ spec:
to any Alertmanagers
summary: Prometheus is not connected to any Alertmanagers
expr: |
prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s"} < 1
prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"} < 1
for: 10m
labels:
severity: warning
@ -910,7 +915,7 @@ spec:
reload failures over the last four hours.'
summary: Prometheus has issues reloading data blocks from disk
expr: |
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s"}[2h]) > 0
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[2h]) > 0
for: 12h
labels:
severity: warning
@ -920,7 +925,7 @@ spec:
compaction failures over the last four hours.'
summary: Prometheus has issues compacting sample blocks
expr: |
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s"}[2h]) > 0
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[2h]) > 0
for: 12h
labels:
severity: warning
@ -930,7 +935,7 @@ spec:
log (WAL).'
summary: Prometheus write-ahead log is corrupted
expr: |
tsdb_wal_corruptions_total{job="prometheus-k8s"} > 0
tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0
for: 4h
labels:
severity: warning
@ -940,7 +945,7 @@ spec:
samples.
summary: Prometheus isn't ingesting samples
expr: |
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s"}[5m]) <= 0
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
for: 10m
labels:
severity: warning
@ -950,7 +955,7 @@ spec:
due to duplicate timestamps but different values'
summary: Prometheus has many samples rejected
expr: |
increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s"}[5m]) > 0
increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 10m
labels:
severity: warning
@ -961,7 +966,7 @@ spec:
message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace
}} Namespace.
expr: |
rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1
rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
for: 10m
labels:
severity: warning
@ -969,7 +974,7 @@ spec:
annotations:
message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
expr: |
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator"}[5m]) > 0.1
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
for: 10m
labels:
severity: warning

View File

@ -13,3 +13,4 @@ spec:
selector:
app: prometheus
prometheus: k8s
sessionAffinity: ClientIP

View File

@ -10,6 +10,7 @@ spec:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 15s
port: metrics
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system

View File

@ -36,7 +36,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
addonResizer: "carlosedp/addon-resizer",
nodeExporter: "carlosedp/node_exporter",
prometheusOperator: "carlosedp/prometheus-operator",
prometheusAdapter: "directxman12/k8s-prometheus-adapter-arm64",
prometheusAdapter: "carlosedp/k8s-prometheus-adapter",
grafana: "carlosedp/monitoring-grafana",
configmapReloader: "carlosedp/configmap-reload",
prometheusConfigReloader: "carlosedp/prometheus-config-reloader",
@ -45,7 +45,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheus+:: {
names: 'k8s',
replicas: 1,
namespaces: ["default", "kube-system","monitoring"],
namespaces: ["default", "kube-system","monitoring","logging"],
},
alertmanager+:: {