Revert jsonnet libraries

This commit is contained in:
Carlos de Paula 2020-05-23 20:38:45 -03:00
parent 7c39bd7493
commit 8882eb5e78
21 changed files with 24271 additions and 26118 deletions

View File

@ -18,8 +18,8 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "e007d4f58d49540925e8bac5aa8fedae6bb9d912",
"sum": "pk7mLpdUrHuJKkj2vhD6LGMU7P+oYYooBXAeZyZa398="
"version": "07a74d61cb6c07965c5b594748dc999d1644862b",
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
},
{
"source": {
@ -28,8 +28,8 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "28332b410a0359a7eb9362d0af4f978bace70242",
"sum": "cEMmJvhn8dLnLqUVR0ql/XnwY8Jy3HH0YWIQQRaDD0o="
"version": "285624d8fbef01923f7b9772fe2da21c5698a666",
"sum": "npFy3VLHkSDiwUK2DPC9Up4ETD0bMiTpGTXqetH0r4A="
},
{
"source": {
@ -38,8 +38,8 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "d0a871b710de7b764c05ced98dbd1eb32a681790",
"sum": "cIOKRTNBUOl3a+QsaA/NjClmZAhyVJHlDFReKlXJBAs="
"version": "59bdf55453ba08b4ed7c271cb3c6627058945ed5",
"sum": "qwMbUQkdPhAn9Sl4OVLgzmNOuOTnRLUmvv14I0unsa8="
},
{
"source": {
@ -48,8 +48,8 @@
"subdir": "grafonnet"
}
},
"version": "5736b62831d779e28a8344646aee1f72b1fa1d90",
"sum": "ch97Uqauz7z+9mkOwzRz6JErxgWcQlfuJEEg+XHEadg="
"version": "7a932c9cfc6ccdb1efca9535f165e055949be42a",
"sum": "HbCbHRvgA9a6K5FlOAYOUnErDHnNPWOCYPvDFU++bQE="
},
{
"source": {
@ -58,7 +58,7 @@
"subdir": "grafana-builder"
}
},
"version": "811ccb022bc2bdcd0b8281ed0a0c858c63e20542",
"version": "03da9ea0fc25e621d195fbb218a6bf8593152721",
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
},
{
@ -79,8 +79,8 @@
"subdir": ""
}
},
"version": "2beabb38d3241eb5da5080cbeb648a0cd1e3cbc2",
"sum": "s6t8ntlUHAjnifWx5V1jnBukTLPya7fX7YZVxJ0GcTk="
"version": "ea905d25c01ff4364937a2faed248e5f2f3fdb35",
"sum": "ww9kAgxtpzRC6y7SiVWH7fWJcptBg3Hde50eXmHzN7U="
},
{
"source": {
@ -89,7 +89,7 @@
"subdir": "lib/promgrafonnet"
}
},
"version": "2beabb38d3241eb5da5080cbeb648a0cd1e3cbc2",
"version": "ea905d25c01ff4364937a2faed248e5f2f3fdb35",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
},
{
@ -99,7 +99,7 @@
"subdir": "jsonnet/kube-state-metrics"
}
},
"version": "6c2701a703c42da95d9a1ef765c5713832df1858",
"version": "c485728b2e585bd1079e12e462cd7c6fef25f155",
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
},
{
@ -109,8 +109,18 @@
"subdir": "jsonnet/kube-state-metrics-mixin"
}
},
"version": "6c2701a703c42da95d9a1ef765c5713832df1858",
"sum": "o5avaguRsfFwYFNen00ZEsub1x4i8Z/ZZ2QoEjFMff8="
"version": "c485728b2e585bd1079e12e462cd7c6fef25f155",
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
},
{
"source": {
"git": {
"remote": "https://github.com/metalmatze/slo-libsonnet.git",
"subdir": "slo-libsonnet"
}
},
"version": "5ddd7ffc39e7a54c9aca997c2c389a8046fab0ff",
"sum": "S7/+tnAkzVh8Li7sg7Hu4aeIQAWHCtxhRQ+k1OKjoQk="
},
{
"source": {
@ -119,8 +129,8 @@
"subdir": "docs/node-mixin"
}
},
"version": "b8847b5b3237c18aa25426fccb3d69a9bdff6ee1",
"sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc="
"version": "0107bc794204f50d887898da60032da890637471",
"sum": "VKdF0zPMSCiuIuXWblSz2VOeBaXzQ7fp40vz9sxj+Bo="
},
{
"source": {
@ -129,8 +139,8 @@
"subdir": "documentation/prometheus-mixin"
}
},
"version": "1f69c38ba4c104520732d416de2894052806cea7",
"sum": "kRb3XBTe/AALDcaTFfyuiKqzhxtLvihBkVkvJ5cUd/I=",
"version": "012161d90d6a8a6bb930b90601fb89ff6cc3ae60",
"sum": "5EUgr6Spr1zNR8Y2/NevjvEkGV9WMvKo6nEScNER1Lc=",
"name": "prometheus"
}
],

View File

@ -1,5 +1,5 @@
{
"apiVersion": "apiextensions.k8s.io/v1",
"apiVersion": "apiextensions.k8s.io/v1beta1",
"kind": "CustomResourceDefinition",
"metadata": {
"annotations": {
@ -9,6 +9,25 @@
"name": "alertmanagers.monitoring.coreos.com"
},
"spec": {
"additionalPrinterColumns": [
{
"JSONPath": ".spec.version",
"description": "The version of Alertmanager",
"name": "Version",
"type": "string"
},
{
"JSONPath": ".spec.replicas",
"description": "The desired replicas number of Alertmanagers",
"name": "Replicas",
"type": "integer"
},
{
"JSONPath": ".metadata.creationTimestamp",
"name": "Age",
"type": "date"
}
],
"group": "monitoring.coreos.com",
"names": {
"kind": "Alertmanager",
@ -17,29 +36,8 @@
"singular": "alertmanager"
},
"scope": "Namespaced",
"versions": [
{
"additionalPrinterColumns": [
{
"description": "The version of Alertmanager",
"jsonPath": ".spec.version",
"name": "Version",
"type": "string"
},
{
"description": "The desired replicas number of Alertmanagers",
"jsonPath": ".spec.replicas",
"name": "Replicas",
"type": "integer"
},
{
"jsonPath": ".metadata.creationTimestamp",
"name": "Age",
"type": "date"
}
],
"name": "v1",
"schema": {
"subresources": { },
"validation": {
"openAPIV3Schema": {
"description": "Alertmanager describes an Alertmanager cluster.",
"properties": {
@ -546,10 +544,6 @@
"description": "Base image that is used to deploy pods, without tag.",
"type": "string"
},
"clusterAdvertiseAddress": {
"description": "ClusterAdvertiseAddress is the explicit address to advertise in cluster. Needs to be provided for non RFC1918 [1] (public) addresses. [1] RFC1918: https://tools.ietf.org/html/rfc1918",
"type": "string"
},
"configMaps": {
"description": "ConfigMaps is a list of ConfigMaps in the same namespace as the Alertmanager object, which shall be mounted into the Alertmanager Pods. The ConfigMaps are mounted into /etc/alertmanager/configmaps/<configmap-name>.",
"items": {
@ -1329,15 +1323,15 @@
"description": "The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"properties": {
"gmsaCredentialSpec": {
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field.",
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"gmsaCredentialSpecName": {
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use.",
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"runAsUserName": {
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. This field is beta-level and may be disabled with the WindowsRunAsUserName feature flag.",
"type": "string"
}
},
@ -1347,7 +1341,7 @@
"type": "object"
},
"startupProbe": {
"description": "StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is a beta feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
"description": "StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is an alpha feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
"properties": {
"exec": {
"description": "One and only one of the following should be specified. Exec specifies the action to take.",
@ -1491,7 +1485,7 @@
"type": "boolean"
},
"volumeDevices": {
"description": "volumeDevices is the list of block devices to be used by the container.",
"description": "volumeDevices is the list of block devices to be used by the container. This is a beta feature.",
"items": {
"description": "volumeDevice describes a mapping of a raw block device within a container.",
"properties": {
@ -2352,15 +2346,15 @@
"description": "The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"properties": {
"gmsaCredentialSpec": {
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field.",
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"gmsaCredentialSpecName": {
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use.",
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"runAsUserName": {
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. This field is beta-level and may be disabled with the WindowsRunAsUserName feature flag.",
"type": "string"
}
},
@ -2370,7 +2364,7 @@
"type": "object"
},
"startupProbe": {
"description": "StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is a beta feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
"description": "StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is an alpha feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
"properties": {
"exec": {
"description": "One and only one of the following should be specified. Exec specifies the action to take.",
@ -2514,7 +2508,7 @@
"type": "boolean"
},
"volumeDevices": {
"description": "volumeDevices is the list of block devices to be used by the container.",
"description": "volumeDevices is the list of block devices to be used by the container. This is a beta feature.",
"items": {
"description": "volumeDevice describes a mapping of a raw block device within a container.",
"properties": {
@ -2624,10 +2618,6 @@
},
"description": "Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match selectors of replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels",
"type": "object"
},
"name": {
"description": "Name must be unique within a namespace. Is required when creating resources, although some resources may allow a client to request the generation of an appropriate name automatically. Name is primarily intended for creation idempotence and configuration definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names",
"type": "string"
}
},
"type": "object"
@ -2688,10 +2678,6 @@
"format": "int64",
"type": "integer"
},
"fsGroupChangePolicy": {
"description": "fsGroupChangePolicy defines behavior of changing ownership and permission of the volume before being exposed inside Pod. This field will only apply to volume types which support fsGroup based ownership(and permissions). It will have no effect on ephemeral volume types such as: secret, configmaps and emptydir. Valid values are \"OnRootMismatch\" and \"Always\". If not specified defaults to \"Always\".",
"type": "string"
},
"runAsGroup": {
"description": "The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence for that container.",
"format": "int64",
@ -2762,15 +2748,15 @@
"description": "The Windows specific settings applied to all containers. If unspecified, the options within a container's SecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"properties": {
"gmsaCredentialSpec": {
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field.",
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"gmsaCredentialSpecName": {
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use.",
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"runAsUserName": {
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. This field is beta-level and may be disabled with the WindowsRunAsUserName feature flag.",
"type": "string"
}
},
@ -2790,10 +2776,6 @@
"storage": {
"description": "Storage is the definition of how storage will be used by the Alertmanager instances.",
"properties": {
"disableMountSubPath": {
"description": "Deprecated: subPath usage will be disabled by default in a future release, this option will become unnecessary. DisableMountSubPath allows to remove any subPath usage in volume mounts.",
"type": "boolean"
},
"emptyDir": {
"description": "EmptyDirVolumeSource to be used by the Prometheus StatefulSets. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir",
"properties": {
@ -2820,27 +2802,7 @@
"type": "string"
},
"metadata": {
"description": "EmbeddedMetadata contains metadata relevant to an EmbeddedResource.",
"properties": {
"annotations": {
"additionalProperties": {
"type": "string"
},
"description": "Annotations is an unstructured key value map stored with a resource that may be set by external tools to store and retrieve arbitrary metadata. They are not queryable and should be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations",
"type": "object"
},
"labels": {
"additionalProperties": {
"type": "string"
},
"description": "Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match selectors of replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels",
"type": "object"
},
"name": {
"description": "Name must be unique within a namespace. Is required when creating resources, although some resources may allow a client to request the generation of an appropriate name automatically. Name is primarily intended for creation idempotence and configuration definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names",
"type": "string"
}
},
"description": "Standard object's metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata",
"type": "object"
},
"spec": {
@ -2854,7 +2816,7 @@
"type": "array"
},
"dataSource": {
"description": "This field can be used to specify either: * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot - Beta) * An existing PVC (PersistentVolumeClaim) * An existing custom resource/object that implements data population (Alpha) In order to use VolumeSnapshot object types, the appropriate feature gate must be enabled (VolumeSnapshotDataSource or AnyVolumeDataSource) If the provisioner or an external controller can support the specified data source, it will create a new volume based on the contents of the specified data source. If the specified data source is not supported, the volume will not be created and the failure will be reported as an event. In the future, we plan to support more data source types and the behavior of the provisioner may change.",
"description": "This field requires the VolumeSnapshotDataSource alpha feature gate to be enabled and currently VolumeSnapshot is the only supported data source. If the provisioner can support VolumeSnapshot data source, it will create a new volume and data will be restored to the volume at the same time. If the provisioner does not support VolumeSnapshot data source, volume will not be created and the failure will be reported as an event. In the future, we plan to support more data source types and the behavior of the provisioner may change.",
"properties": {
"apiGroup": {
"description": "APIGroup is the group for the resource being referenced. If APIGroup is not specified, the specified Kind must be in the core API group. For any other third-party types, APIGroup is required.",
@ -2942,7 +2904,7 @@
"type": "string"
},
"volumeMode": {
"description": "volumeMode defines what type of volume is required by the claim. Value of Filesystem is implied when not included in claim spec.",
"description": "volumeMode defines what type of volume is required by the claim. Value of Filesystem is implied when not included in claim spec. This is a beta feature.",
"type": "string"
},
"volumeName": {
@ -4238,9 +4200,12 @@
"type": "object"
}
},
"version": "v1",
"versions": [
{
"name": "v1",
"served": true,
"storage": true,
"subresources": { }
"storage": true
}
]
},

View File

@ -1,4 +1,4 @@
apiVersion: apiextensions.k8s.io/v1
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
annotations:
@ -13,9 +13,7 @@ spec:
plural: podmonitors
singular: podmonitor
scope: Namespaced
versions:
- name: v1
schema:
validation:
openAPIV3Schema:
description: PodMonitor defines monitoring for a set of pods.
properties:
@ -55,8 +53,8 @@ spec:
podMetricsEndpoints:
description: A list of endpoints allowed as part of this PodMonitor.
items:
description: PodMetricsEndpoint defines a scrapeable endpoint of
a Kubernetes Pod serving Prometheus metrics.
description: PodMetricsEndpoint defines a scrapeable endpoint of a
Kubernetes Pod serving Prometheus metrics.
properties:
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
@ -70,21 +68,20 @@ spec:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before
ingestion.
description: MetricRelabelConfigs to apply to samples before ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
@ -103,8 +100,8 @@ spec:
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
@ -126,7 +123,7 @@ spec:
description: HTTP path to scrape for metrics.
type: string
port:
description: Name of the pod port this endpoint refers to. Mutually
description: Name of the port this endpoint refers to. Mutually
exclusive with targetPort.
type: string
proxyUrl:
@ -138,17 +135,17 @@ spec:
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
@ -167,8 +164,8 @@ spec:
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
@ -189,7 +186,8 @@ spec:
anyOf:
- type: integer
- type: string
description: 'Deprecated: Use ''port'' instead.'
description: Name or number of the target port of the endpoint.
Mutually exclusive with port.
x-kubernetes-int-or-string: true
type: object
type: array
@ -211,25 +209,24 @@ spec:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
description: A label selector requirement is a selector that contains
values, a key, and an operator that relates the key and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
description: operator represents a key's relationship to a
set of values. Valid operators are In, NotIn, Exists and
DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
description: values is an array of string values. If the operator
is In or NotIn, the values array must be non-empty. If the
operator is Exists or DoesNotExist, the values array must
be empty. This array is replaced during a strategic merge
patch.
items:
type: string
type: array
@ -243,8 +240,8 @@ spec:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
of matchExpressions, whose key field is "key", the operator is
"In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
@ -255,6 +252,9 @@ spec:
required:
- spec
type: object
version: v1
versions:
- name: v1
served: true
storage: true
status:

View File

@ -1,5 +1,5 @@
{
"apiVersion": "apiextensions.k8s.io/v1",
"apiVersion": "apiextensions.k8s.io/v1beta1",
"kind": "CustomResourceDefinition",
"metadata": {
"annotations": {
@ -17,10 +17,7 @@
"singular": "prometheusrule"
},
"scope": "Namespaced",
"versions": [
{
"name": "v1",
"schema": {
"validation": {
"openAPIV3Schema": {
"description": "PrometheusRule defines alerting rules for a Prometheus instance",
"properties": {
@ -115,6 +112,10 @@
"type": "object"
}
},
"version": "v1",
"versions": [
{
"name": "v1",
"served": true,
"storage": true
}

View File

@ -1,4 +1,4 @@
apiVersion: apiextensions.k8s.io/v1
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
annotations:
@ -13,9 +13,7 @@ spec:
plural: prometheusrules
singular: prometheusrule
scope: Namespaced
versions:
- name: v1
schema:
validation:
openAPIV3Schema:
description: PrometheusRule defines alerting rules for a Prometheus instance
properties:
@ -38,8 +36,8 @@ spec:
description: Content of Prometheus rule file
items:
description: 'RuleGroup is a list of sequentially evaluated recording
and alerting rules. Note: PartialResponseStrategy is only used
by ThanosRuler and will be ignored by Prometheus instances. Valid
and alerting rules. Note: PartialResponseStrategy is only used by
ThanosRuler and will be ignored by Prometheus instances. Valid
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
properties:
interval:
@ -84,6 +82,9 @@ spec:
required:
- spec
type: object
version: v1
versions:
- name: v1
served: true
storage: true
status:

View File

@ -1,5 +1,5 @@
{
"apiVersion": "apiextensions.k8s.io/v1",
"apiVersion": "apiextensions.k8s.io/v1beta1",
"kind": "CustomResourceDefinition",
"metadata": {
"annotations": {
@ -17,10 +17,7 @@
"singular": "servicemonitor"
},
"scope": "Namespaced",
"versions": [
{
"name": "v1",
"schema": {
"validation": {
"openAPIV3Schema": {
"description": "ServiceMonitor defines monitoring for a set of services.",
"properties": {
@ -251,7 +248,7 @@
"type": "string"
}
],
"description": "Name or number of the pod port this endpoint refers to. Mutually exclusive with port.",
"description": "Name or number of the target port of the endpoint. Mutually exclusive with port.",
"x-kubernetes-int-or-string": true
},
"tlsConfig": {
@ -498,6 +495,10 @@
"type": "object"
}
},
"version": "v1",
"versions": [
{
"name": "v1",
"served": true,
"storage": true
}

View File

@ -1,4 +1,4 @@
apiVersion: apiextensions.k8s.io/v1
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
annotations:
@ -13,9 +13,7 @@ spec:
plural: servicemonitors
singular: servicemonitor
scope: Namespaced
versions:
- name: v1
schema:
validation:
openAPIV3Schema:
description: ServiceMonitor defines monitoring for a set of services.
properties:
@ -46,8 +44,8 @@ spec:
basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
properties:
password:
description: The secret in the service monitor namespace
that contains the password for authentication.
description: The secret in the service monitor namespace that
contains the password for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -65,8 +63,8 @@ spec:
- key
type: object
username:
description: The secret in the service monitor namespace
that contains the username for authentication.
description: The secret in the service monitor namespace that
contains the username for authentication.
properties:
key:
description: The key of the secret to select from. Must
@ -93,16 +91,16 @@ spec:
service monitor and accessible by the Prometheus Operator.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
description: The key of the secret to select from. Must be
a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
description: Specify whether the Secret or its key must be
defined
type: boolean
required:
- key
@ -119,21 +117,20 @@ spec:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before
ingestion.
description: MetricRelabelConfigs to apply to samples before ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
@ -152,8 +149,8 @@ spec:
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
@ -187,17 +184,17 @@ spec:
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source
label values.
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
@ -216,8 +213,8 @@ spec:
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
@ -238,15 +235,14 @@ spec:
anyOf:
- type: integer
- type: string
description: Name or number of the pod port this endpoint refers
to. Mutually exclusive with port.
description: Name or number of the target port of the endpoint.
Mutually exclusive with port.
x-kubernetes-int-or-string: true
tlsConfig:
description: TLS configuration to use when scraping the endpoint
properties:
ca:
description: Stuct containing the CA cert to use for the
targets.
description: Stuct containing the CA cert to use for the targets.
properties:
configMap:
description: ConfigMap containing data to use for the
@ -292,8 +288,8 @@ spec:
to use for the targets.
type: string
cert:
description: Struct containing the client cert file for
the targets.
description: Struct containing the client cert file for the
targets.
properties:
configMap:
description: ConfigMap containing data to use for the
@ -405,25 +401,24 @@ spec:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that
contains values, a key, and an operator that relates the key
and values.
description: A label selector requirement is a selector that contains
values, a key, and an operator that relates the key and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
description: operator represents a key's relationship to a
set of values. Valid operators are In, NotIn, Exists and
DoesNotExist.
type: string
values:
description: values is an array of string values. If the
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
description: values is an array of string values. If the operator
is In or NotIn, the values array must be non-empty. If the
operator is Exists or DoesNotExist, the values array must
be empty. This array is replaced during a strategic merge
patch.
items:
type: string
type: array
@ -437,8 +432,8 @@ spec:
type: string
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
of matchExpressions, whose key field is "key", the operator is
"In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
@ -455,6 +450,9 @@ spec:
required:
- spec
type: object
version: v1
versions:
- name: v1
served: true
storage: true
status:

View File

@ -1,5 +1,5 @@
{
"apiVersion": "apiextensions.k8s.io/v1",
"apiVersion": "apiextensions.k8s.io/v1beta1",
"kind": "CustomResourceDefinition",
"metadata": {
"annotations": {
@ -17,10 +17,7 @@
"singular": "thanosruler"
},
"scope": "Namespaced",
"versions": [
{
"name": "v1",
"schema": {
"validation": {
"openAPIV3Schema": {
"description": "ThanosRuler defines a ThanosRuler deployment.",
"properties": {
@ -523,10 +520,6 @@
},
"type": "array"
},
"alertQueryUrl": {
"description": "The external Query URL the Thanos Ruler will set in the 'Source' field of all alerts. Maps to the '--alert.query-url' CLI arg.",
"type": "string"
},
"alertmanagersConfig": {
"description": "Define configuration for connecting to alertmanager. Only available with thanos v0.10.0 and higher. Maps to the `alertmanagers.config` arg.",
"properties": {
@ -549,12 +542,9 @@
"type": "object"
},
"alertmanagersUrl": {
"description": "Define URLs to send alerts to Alertmanager. For Thanos v0.10.0 and higher, AlertManagersConfig should be used instead. Note: this field will be ignored if AlertManagersConfig is specified. Maps to the `alertmanagers.url` arg.",
"items": {
"description": "Define URL to send alerts to alertmanager. For Thanos v0.10.0 and higher, AlertManagersConfig should be used instead. Maps to the `alertmanagers.url` arg.",
"type": "string"
},
"type": "array"
},
"containers": {
"description": "Containers allows injecting additional containers or modifying operator generated containers. This can be used to allow adding an authentication proxy to a ThanosRuler pod or to change the behavior of an operator generated container. Containers described here modify an operator generated container if they share the same name and modifications are done via a strategic merge patch. The current container names are: `thanos-ruler` and `rules-configmap-reloader`. Overriding containers is entirely outside the scope of what the maintainers will support and by doing so, you accept that this behaviour may break at any time without notice.",
"items": {
@ -1323,15 +1313,15 @@
"description": "The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"properties": {
"gmsaCredentialSpec": {
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field.",
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"gmsaCredentialSpecName": {
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use.",
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"runAsUserName": {
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. This field is beta-level and may be disabled with the WindowsRunAsUserName feature flag.",
"type": "string"
}
},
@ -1341,7 +1331,7 @@
"type": "object"
},
"startupProbe": {
"description": "StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is a beta feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
"description": "StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is an alpha feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
"properties": {
"exec": {
"description": "One and only one of the following should be specified. Exec specifies the action to take.",
@ -1485,7 +1475,7 @@
"type": "boolean"
},
"volumeDevices": {
"description": "volumeDevices is the list of block devices to be used by the container.",
"description": "volumeDevices is the list of block devices to be used by the container. This is a beta feature.",
"items": {
"description": "volumeDevice describes a mapping of a raw block device within a container.",
"properties": {
@ -1568,149 +1558,6 @@
"description": "The external URL the Thanos Ruler instances will be available under. This is necessary to generate correct URLs. This is necessary if Thanos Ruler is not served from root of a DNS name.",
"type": "string"
},
"grpcServerTlsConfig": {
"description": "GRPCServerTLSConfig configures the gRPC server from which Thanos Querier reads recorded rule data. Note: Currently only the CAFile, CertFile, and KeyFile fields are supported. Maps to the '--grpc-server-tls-*' CLI args.",
"properties": {
"ca": {
"description": "Stuct containing the CA cert to use for the targets.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
"properties": {
"key": {
"description": "The key to select.",
"type": "string"
},
"name": {
"description": "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?",
"type": "string"
},
"optional": {
"description": "Specify whether the ConfigMap or its key must be defined",
"type": "boolean"
}
},
"required": [
"key"
],
"type": "object"
},
"secret": {
"description": "Secret containing data to use for the targets.",
"properties": {
"key": {
"description": "The key of the secret to select from. Must be a valid secret key.",
"type": "string"
},
"name": {
"description": "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?",
"type": "string"
},
"optional": {
"description": "Specify whether the Secret or its key must be defined",
"type": "boolean"
}
},
"required": [
"key"
],
"type": "object"
}
},
"type": "object"
},
"caFile": {
"description": "Path to the CA cert in the Prometheus container to use for the targets.",
"type": "string"
},
"cert": {
"description": "Struct containing the client cert file for the targets.",
"properties": {
"configMap": {
"description": "ConfigMap containing data to use for the targets.",
"properties": {
"key": {
"description": "The key to select.",
"type": "string"
},
"name": {
"description": "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?",
"type": "string"
},
"optional": {
"description": "Specify whether the ConfigMap or its key must be defined",
"type": "boolean"
}
},
"required": [
"key"
],
"type": "object"
},
"secret": {
"description": "Secret containing data to use for the targets.",
"properties": {
"key": {
"description": "The key of the secret to select from. Must be a valid secret key.",
"type": "string"
},
"name": {
"description": "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?",
"type": "string"
},
"optional": {
"description": "Specify whether the Secret or its key must be defined",
"type": "boolean"
}
},
"required": [
"key"
],
"type": "object"
}
},
"type": "object"
},
"certFile": {
"description": "Path to the client cert file in the Prometheus container for the targets.",
"type": "string"
},
"insecureSkipVerify": {
"description": "Disable target certificate validation.",
"type": "boolean"
},
"keyFile": {
"description": "Path to the client key file in the Prometheus container for the targets.",
"type": "string"
},
"keySecret": {
"description": "Secret containing the client key file for the targets.",
"properties": {
"key": {
"description": "The key of the secret to select from. Must be a valid secret key.",
"type": "string"
},
"name": {
"description": "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?",
"type": "string"
},
"optional": {
"description": "Specify whether the Secret or its key must be defined",
"type": "boolean"
}
},
"required": [
"key"
],
"type": "object"
},
"serverName": {
"description": "Used to verify the hostname for the targets.",
"type": "string"
}
},
"type": "object"
},
"image": {
"description": "Thanos container image URL.",
"type": "string"
@ -2497,15 +2344,15 @@
"description": "The Windows specific settings applied to all containers. If unspecified, the options from the PodSecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"properties": {
"gmsaCredentialSpec": {
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field.",
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"gmsaCredentialSpecName": {
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use.",
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"runAsUserName": {
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. This field is beta-level and may be disabled with the WindowsRunAsUserName feature flag.",
"type": "string"
}
},
@ -2515,7 +2362,7 @@
"type": "object"
},
"startupProbe": {
"description": "StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is a beta feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
"description": "StartupProbe indicates that the Pod has successfully initialized. If specified, no other probes are executed until this completes successfully. If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, when it might take a long time to load data or warm a cache, than during steady-state operation. This cannot be updated. This is an alpha feature enabled by the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
"properties": {
"exec": {
"description": "One and only one of the following should be specified. Exec specifies the action to take.",
@ -2659,7 +2506,7 @@
"type": "boolean"
},
"volumeDevices": {
"description": "volumeDevices is the list of block devices to be used by the container.",
"description": "volumeDevices is the list of block devices to be used by the container. This is a beta feature.",
"items": {
"description": "volumeDevice describes a mapping of a raw block device within a container.",
"properties": {
@ -2797,10 +2644,6 @@
},
"description": "Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match selectors of replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels",
"type": "object"
},
"name": {
"description": "Name must be unique within a namespace. Is required when creating resources, although some resources may allow a client to request the generation of an appropriate name automatically. Name is primarily intended for creation idempotence and configuration definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names",
"type": "string"
}
},
"type": "object"
@ -2813,27 +2656,6 @@
"description": "Priority class assigned to the Pods",
"type": "string"
},
"queryConfig": {
"description": "Define configuration for connecting to thanos query instances. If this is defined, the QueryEndpoints field will be ignored. Maps to the `query.config` CLI argument. Only available with thanos v0.11.0 and higher.",
"properties": {
"key": {
"description": "The key of the secret to select from. Must be a valid secret key.",
"type": "string"
},
"name": {
"description": "Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?",
"type": "string"
},
"optional": {
"description": "Specify whether the Secret or its key must be defined",
"type": "boolean"
}
},
"required": [
"key"
],
"type": "object"
},
"queryEndpoints": {
"description": "QueryEndpoints defines Thanos querier endpoints from which to query metrics. Maps to the --query flag of thanos ruler.",
"items": {
@ -2966,10 +2788,6 @@
"format": "int64",
"type": "integer"
},
"fsGroupChangePolicy": {
"description": "fsGroupChangePolicy defines behavior of changing ownership and permission of the volume before being exposed inside Pod. This field will only apply to volume types which support fsGroup based ownership(and permissions). It will have no effect on ephemeral volume types such as: secret, configmaps and emptydir. Valid values are \"OnRootMismatch\" and \"Always\". If not specified defaults to \"Always\".",
"type": "string"
},
"runAsGroup": {
"description": "The GID to run the entrypoint of the container process. Uses runtime default if unset. May also be set in SecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence for that container.",
"format": "int64",
@ -3040,15 +2858,15 @@
"description": "The Windows specific settings applied to all containers. If unspecified, the options within a container's SecurityContext will be used. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"properties": {
"gmsaCredentialSpec": {
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field.",
"description": "GMSACredentialSpec is where the GMSA admission webhook (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the GMSA credential spec named by the GMSACredentialSpecName field. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"gmsaCredentialSpecName": {
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use.",
"description": "GMSACredentialSpecName is the name of the GMSA credential spec to use. This field is alpha-level and is only honored by servers that enable the WindowsGMSA feature flag.",
"type": "string"
},
"runAsUserName": {
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence.",
"description": "The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. This field is beta-level and may be disabled with the WindowsRunAsUserName feature flag.",
"type": "string"
}
},
@ -3064,10 +2882,6 @@
"storage": {
"description": "Storage spec to specify how storage shall be used.",
"properties": {
"disableMountSubPath": {
"description": "Deprecated: subPath usage will be disabled by default in a future release, this option will become unnecessary. DisableMountSubPath allows to remove any subPath usage in volume mounts.",
"type": "boolean"
},
"emptyDir": {
"description": "EmptyDirVolumeSource to be used by the Prometheus StatefulSets. If specified, used in place of any volumeClaimTemplate. More info: https://kubernetes.io/docs/concepts/storage/volumes/#emptydir",
"properties": {
@ -3094,27 +2908,7 @@
"type": "string"
},
"metadata": {
"description": "EmbeddedMetadata contains metadata relevant to an EmbeddedResource.",
"properties": {
"annotations": {
"additionalProperties": {
"type": "string"
},
"description": "Annotations is an unstructured key value map stored with a resource that may be set by external tools to store and retrieve arbitrary metadata. They are not queryable and should be preserved when modifying objects. More info: http://kubernetes.io/docs/user-guide/annotations",
"type": "object"
},
"labels": {
"additionalProperties": {
"type": "string"
},
"description": "Map of string keys and values that can be used to organize and categorize (scope and select) objects. May match selectors of replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels",
"type": "object"
},
"name": {
"description": "Name must be unique within a namespace. Is required when creating resources, although some resources may allow a client to request the generation of an appropriate name automatically. Name is primarily intended for creation idempotence and configuration definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names",
"type": "string"
}
},
"description": "Standard object's metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata",
"type": "object"
},
"spec": {
@ -3128,7 +2922,7 @@
"type": "array"
},
"dataSource": {
"description": "This field can be used to specify either: * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot - Beta) * An existing PVC (PersistentVolumeClaim) * An existing custom resource/object that implements data population (Alpha) In order to use VolumeSnapshot object types, the appropriate feature gate must be enabled (VolumeSnapshotDataSource or AnyVolumeDataSource) If the provisioner or an external controller can support the specified data source, it will create a new volume based on the contents of the specified data source. If the specified data source is not supported, the volume will not be created and the failure will be reported as an event. In the future, we plan to support more data source types and the behavior of the provisioner may change.",
"description": "This field requires the VolumeSnapshotDataSource alpha feature gate to be enabled and currently VolumeSnapshot is the only supported data source. If the provisioner can support VolumeSnapshot data source, it will create a new volume and data will be restored to the volume at the same time. If the provisioner does not support VolumeSnapshot data source, volume will not be created and the failure will be reported as an event. In the future, we plan to support more data source types and the behavior of the provisioner may change.",
"properties": {
"apiGroup": {
"description": "APIGroup is the group for the resource being referenced. If APIGroup is not specified, the specified Kind must be in the core API group. For any other third-party types, APIGroup is required.",
@ -3216,7 +3010,7 @@
"type": "string"
},
"volumeMode": {
"description": "volumeMode defines what type of volume is required by the claim. Value of Filesystem is implied when not included in claim spec.",
"description": "volumeMode defines what type of volume is required by the claim. Value of Filesystem is implied when not included in claim spec. This is a beta feature.",
"type": "string"
},
"volumeName": {
@ -4441,6 +4235,9 @@
"type": "array"
}
},
"required": [
"queryEndpoints"
],
"type": "object"
},
"status": {
@ -4487,6 +4284,10 @@
"type": "object"
}
},
"version": "v1",
"versions": [
{
"name": "v1",
"served": true,
"storage": true
}

View File

@ -7,6 +7,26 @@ metadata:
app.kubernetes.io/version: v0.37.0
name: prometheus-operator
rules:
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- apiGroups:
- apiextensions.k8s.io
resourceNames:
- alertmanagers.monitoring.coreos.com
- podmonitors.monitoring.coreos.com
- prometheuses.monitoring.coreos.com
- prometheusrules.monitoring.coreos.com
- servicemonitors.monitoring.coreos.com
- thanosrulers.monitoring.coreos.com
resources:
- customresourcedefinitions
verbs:
- get
- update
- apiGroups:
- monitoring.coreos.com
resources:
@ -67,15 +87,3 @@ rules:
- get
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create

View File

@ -40,18 +40,6 @@ spec:
memory: 100Mi
securityContext:
allowPrivilegeEscalation: false
- args:
- --logtostderr
- --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:8080/
image: carlosedp/kube-rbac-proxy:v0.5.0
name: kube-rbac-proxy
ports:
- containerPort: 8443
name: https
securityContext:
runAsUser: 65534
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:

View File

@ -10,9 +10,9 @@ metadata:
spec:
clusterIP: None
ports:
- name: https
port: 8443
targetPort: https
- name: http
port: 8080
targetPort: http
selector:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator

File diff suppressed because it is too large Load Diff

View File

@ -2,19 +2,17 @@ apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/name: node-exporter
app.kubernetes.io/version: v0.18.1
app: node-exporter
name: node-exporter
namespace: monitoring
spec:
selector:
matchLabels:
app.kubernetes.io/name: node-exporter
app: node-exporter
template:
metadata:
labels:
app.kubernetes.io/name: node-exporter
app.kubernetes.io/version: v0.18.1
app: node-exporter
spec:
containers:
- args:
@ -24,7 +22,8 @@ spec:
- --path.rootfs=/host/root
- --no-collector.wifi
- --no-collector.hwmon
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
image: prom/node-exporter:v0.18.1
name: node-exporter
resources:

View File

@ -2,8 +2,7 @@ apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/name: node-exporter
app.kubernetes.io/version: v0.18.1
k8s-app: node-exporter
name: node-exporter
namespace: monitoring
spec:
@ -13,4 +12,4 @@ spec:
port: 9100
targetPort: https
selector:
app.kubernetes.io/name: node-exporter
app: node-exporter

View File

@ -2,8 +2,7 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/name: node-exporter
app.kubernetes.io/version: v0.18.1
k8s-app: node-exporter
name: node-exporter
namespace: monitoring
spec:
@ -21,7 +20,7 @@ spec:
scheme: https
tlsConfig:
insecureSkipVerify: true
jobLabel: app.kubernetes.io/name
jobLabel: k8s-app
selector:
matchLabels:
app.kubernetes.io/name: node-exporter
k8s-app: node-exporter

View File

@ -1,32 +1,32 @@
apiVersion: v1
data:
config.yaml: |-
"resourceRules":
"cpu":
"containerLabel": "container"
"containerQuery": "sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}[5m])) by (<<.GroupBy>>)"
"nodeQuery": "sum(1 - irate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)"
"resources":
"overrides":
"namespace":
"resource": "namespace"
"node":
"resource": "node"
"pod":
"resource": "pod"
"memory":
"containerLabel": "container"
"containerQuery": "sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!=\"POD\",container!=\"\",pod!=\"\"}) by (<<.GroupBy>>)"
"nodeQuery": "sum(node_memory_MemTotal_bytes{job=\"node-exporter\",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job=\"node-exporter\",<<.LabelMatchers>>}) by (<<.GroupBy>>)"
"resources":
"overrides":
"instance":
"resource": "node"
"namespace":
"resource": "namespace"
"pod":
"resource": "pod"
"window": "5m"
config.yaml: |
resourceRules:
cpu:
containerQuery: sum(irate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - irate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod:
resource: pod
containerLabel: container
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
instance:
resource: node
namespace:
resource: namespace
pod:
resource: pod
containerLabel: container
window: 5m
kind: ConfigMap
metadata:
name: adapter-config

View File

@ -9,12 +9,8 @@ metadata:
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
port: https
scheme: https
tlsConfig:
insecureSkipVerify: true
- honorLabels: true
port: http
selector:
matchLabels:
app.kubernetes.io/component: controller

View File

@ -65,289 +65,122 @@ spec:
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
)
record: instance:node_network_transmit_drop_excluding_lo:rate1m
- name: kube-apiserver.rules
- name: kube-apiserver-error
rules:
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
-
(
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[5m]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate5m
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[30m]
), "status_class", "${1}xx", "code", "([0-9])..")
)
+
# errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
labels:
job: apiserver
record: status_class:apiserver_request_total:rate30m
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[1h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate1h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[2h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate2h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[6h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate6h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[1d]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate1d
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[3d]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate3d
- expr: |
sum(status_class:apiserver_request_total:rate5m{job="apiserver",status_class="5xx"})
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
sum(status_class:apiserver_request_total:rate5m{job="apiserver"})
labels:
verb: read
record: apiserver_request:burnrate1d
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate5m
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
-
(
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
)
)
+
# errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
)
sum(status_class:apiserver_request_total:rate30m{job="apiserver",status_class="5xx"})
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
sum(status_class:apiserver_request_total:rate30m{job="apiserver"})
labels:
verb: read
record: apiserver_request:burnrate1h
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate30m
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
-
(
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
)
)
+
# errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
)
sum(status_class:apiserver_request_total:rate1h{job="apiserver",status_class="5xx"})
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
sum(status_class:apiserver_request_total:rate1h{job="apiserver"})
labels:
verb: read
record: apiserver_request:burnrate2h
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate1h
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
-
(
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
)
)
+
# errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
)
sum(status_class:apiserver_request_total:rate2h{job="apiserver",status_class="5xx"})
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
sum(status_class:apiserver_request_total:rate2h{job="apiserver"})
labels:
verb: read
record: apiserver_request:burnrate30m
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate2h
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
-
(
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
)
)
+
# errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
)
sum(status_class:apiserver_request_total:rate6h{job="apiserver",status_class="5xx"})
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
sum(status_class:apiserver_request_total:rate6h{job="apiserver"})
labels:
verb: read
record: apiserver_request:burnrate3d
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate6h
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
-
(
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
)
)
+
# errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
)
sum(status_class:apiserver_request_total:rate1d{job="apiserver",status_class="5xx"})
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
sum(status_class:apiserver_request_total:rate1d{job="apiserver"})
labels:
verb: read
record: apiserver_request:burnrate5m
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate1d
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
-
(
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
)
)
+
# errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
)
sum(status_class:apiserver_request_total:rate3d{job="apiserver",status_class="5xx"})
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
sum(status_class:apiserver_request_total:rate3d{job="apiserver"})
labels:
verb: read
record: apiserver_request:burnrate6h
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
-
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
)
+
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
)
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
labels:
verb: write
record: apiserver_request:burnrate1d
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
-
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
)
+
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
)
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
labels:
verb: write
record: apiserver_request:burnrate1h
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
-
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
)
+
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
)
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
labels:
verb: write
record: apiserver_request:burnrate2h
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
-
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
)
+
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
)
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
labels:
verb: write
record: apiserver_request:burnrate30m
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
-
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
)
+
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
)
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
labels:
verb: write
record: apiserver_request:burnrate3d
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
-
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
)
+
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
)
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels:
verb: write
record: apiserver_request:burnrate5m
- expr: |
(
(
# too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
-
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
)
+
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
)
/
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
labels:
verb: write
record: apiserver_request:burnrate6h
- expr: |
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels:
verb: read
record: code_resource:apiserver_request_total:rate5m
- expr: |
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels:
verb: write
record: code_resource:apiserver_request_total:rate5m
- expr: |
histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
labels:
quantile: "0.99"
verb: read
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
labels:
quantile: "0.99"
verb: write
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate3d
- name: kube-apiserver.rules
rules:
- expr: |
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT"}[5m])) without(instance, pod)
/
@ -368,153 +201,6 @@ spec:
labels:
quantile: "0.5"
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- interval: 3m
name: kube-apiserver-availability.rules
rules:
- expr: |
1 - (
(
# write too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
-
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
) +
(
# read too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
-
(
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
)
) +
# errors
sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
)
/
sum(code:apiserver_request_total:increase30d)
labels:
verb: all
record: apiserver_request:availability30d
- expr: |
1 - (
sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
-
(
# too slow
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
)
+
# errors
sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
)
/
sum(code:apiserver_request_total:increase30d{verb="read"})
labels:
verb: read
record: apiserver_request:availability30d
- expr: |
1 - (
(
# too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
-
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
)
+
# errors
sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
)
/
sum(code:apiserver_request_total:increase30d{verb="write"})
labels:
verb: write
record: apiserver_request:availability30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
labels:
verb: read
record: code:apiserver_request_total:increase30d
- expr: |
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
labels:
verb: write
record: code:apiserver_request_total:increase30d
- name: k8s.rules
rules:
- expr: |
@ -524,31 +210,31 @@ spec:
sum by (cluster, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
1, max by(cluster, namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
- expr: |
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info{node!=""})
max by(namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_memory_working_set_bytes
- expr: |
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info{node!=""})
max by(namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_memory_rss
- expr: |
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info{node!=""})
max by(namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_memory_cache
- expr: |
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
max by(namespace, pod, node) (kube_pod_info{node!=""})
max by(namespace, pod, node) (kube_pod_info)
)
record: node_namespace_pod_container:container_memory_swap
- expr: |
@ -663,12 +349,12 @@ spec:
- name: node.rules
rules:
- expr: |
sum(min(kube_pod_info{node!=""}) by (cluster, node))
sum(min(kube_pod_info) by (cluster, node))
record: ':kube_pod_info_node_count:'
- expr: |
topk by(namespace, pod) (1,
max by (node, namespace, pod) (
label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)")
label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")
))
record: 'node_namespace_pod:kube_pod_info:'
- expr: |
@ -919,56 +605,6 @@ spec:
for: 1h
labels:
severity: warning
- alert: NodeHighNumberConntrackEntriesUsed
annotations:
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit.
expr: |
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
labels:
severity: warning
- alert: NodeTextFileCollectorScrapeError
annotations:
description: Node Exporter text file collector failed to scrape.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape.
expr: |
node_textfile_scrape_error{job="node-exporter"} == 1
labels:
severity: warning
- alert: NodeClockSkewDetected
annotations:
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
Ensure NTP is configured correctly on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
summary: Clock skew detected.
expr: |
(
node_timex_offset_seconds > 0.05
and
deriv(node_timex_offset_seconds[5m]) >= 0
)
or
(
node_timex_offset_seconds < -0.05
and
deriv(node_timex_offset_seconds[5m]) <= 0
)
for: 10m
labels:
severity: warning
- alert: NodeClockNotSynchronising
annotations:
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP
is configured on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
summary: Clock not synchronising.
expr: |
min_over_time(node_timex_sync_status[5m]) == 0
for: 10m
labels:
severity: warning
- name: kubernetes-apps
rules:
- alert: KubePodCrashLooping
@ -977,26 +613,20 @@ spec:
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
expr: |
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0
for: 15m
labels:
severity: warning
severity: critical
- alert: KubePodNotReady
annotations:
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
state for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
expr: |
sum by (namespace, pod) (
max by(namespace, pod) (
kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}
) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (
1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})
)
) > 0
sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})) > 0
for: 15m
labels:
severity: warning
severity: critical
- alert: KubeDeploymentGenerationMismatch
annotations:
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
@ -1009,7 +639,7 @@ spec:
kube_deployment_metadata_generation{job="kube-state-metrics"}
for: 15m
labels:
severity: warning
severity: critical
- alert: KubeDeploymentReplicasMismatch
annotations:
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
@ -1027,7 +657,7 @@ spec:
)
for: 15m
labels:
severity: warning
severity: critical
- alert: KubeStatefulSetReplicasMismatch
annotations:
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
@ -1045,7 +675,7 @@ spec:
)
for: 15m
labels:
severity: warning
severity: critical
- alert: KubeStatefulSetGenerationMismatch
annotations:
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
@ -1058,7 +688,7 @@ spec:
kube_statefulset_metadata_generation{job="kube-state-metrics"}
for: 15m
labels:
severity: warning
severity: critical
- alert: KubeStatefulSetUpdateNotRolledOut
annotations:
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
@ -1078,7 +708,7 @@ spec:
)
for: 15m
labels:
severity: warning
severity: critical
- alert: KubeDaemonSetRolloutStuck
annotations:
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
@ -1090,7 +720,7 @@ spec:
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
for: 15m
labels:
severity: warning
severity: critical
- alert: KubeContainerWaiting
annotations:
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
@ -1120,7 +750,7 @@ spec:
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
expr: |
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
for: 15m
for: 10m
labels:
severity: warning
- alert: KubeCronJobRunning
@ -1194,11 +824,11 @@ spec:
for: 5m
labels:
severity: warning
- alert: KubeMemoryOvercommit
- alert: KubeMemOvercommit
annotations:
message: Cluster has overcommitted memory resource requests for Pods and cannot
tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
expr: |
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})
/
@ -1210,10 +840,10 @@ spec:
for: 5m
labels:
severity: warning
- alert: KubeCPUQuotaOvercommit
- alert: KubeCPUOvercommit
annotations:
message: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
/
@ -1222,10 +852,10 @@ spec:
for: 5m
labels:
severity: warning
- alert: KubeMemoryQuotaOvercommit
- alert: KubeMemOvercommit
annotations:
message: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
/
@ -1263,12 +893,12 @@ spec:
severity: warning
- name: kubernetes-storage
rules:
- alert: KubePersistentVolumeFillingUp
- alert: KubePersistentVolumeUsageCritical
annotations:
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
}} free.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
expr: |
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
/
@ -1277,12 +907,12 @@ spec:
for: 1m
labels:
severity: critical
- alert: KubePersistentVolumeFillingUp
- alert: KubePersistentVolumeFullInFourDays
annotations:
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} is expected to fill up within four
days. Currently {{ $value | humanizePercentage }} is available.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
expr: |
(
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
@ -1293,7 +923,7 @@ spec:
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
for: 1h
labels:
severity: warning
severity: critical
- alert: KubePersistentVolumeErrors
annotations:
message: The persistent volume {{ $labels.persistentvolume }} has status {{
@ -1329,60 +959,48 @@ spec:
for: 15m
labels:
severity: warning
- name: kube-apiserver-slos
- name: kube-apiserver-error-alerts
rules:
- alert: KubeAPIErrorBudgetBurn
- alert: ErrorBudgetBurn
annotations:
message: The API server is burning too much error budget
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
message: 'High requests error budget burn for job=apiserver (current value:
{{ $value }})'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn
expr: |
sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)
(
status_class_5xx:apiserver_request_total:ratio_rate1h{job="apiserver"} > (14.4*0.010000)
and
sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
for: 2m
status_class_5xx:apiserver_request_total:ratio_rate5m{job="apiserver"} > (14.4*0.010000)
)
or
(
status_class_5xx:apiserver_request_total:ratio_rate6h{job="apiserver"} > (6*0.010000)
and
status_class_5xx:apiserver_request_total:ratio_rate30m{job="apiserver"} > (6*0.010000)
)
labels:
long: 1h
job: apiserver
severity: critical
short: 5m
- alert: KubeAPIErrorBudgetBurn
- alert: ErrorBudgetBurn
annotations:
message: The API server is burning too much error budget
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
message: 'High requests error budget burn for job=apiserver (current value:
{{ $value }})'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn
expr: |
sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)
(
status_class_5xx:apiserver_request_total:ratio_rate1d{job="apiserver"} > (3*0.010000)
and
sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
for: 15m
labels:
long: 6h
severity: critical
short: 30m
- alert: KubeAPIErrorBudgetBurn
annotations:
message: The API server is burning too much error budget
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
expr: |
sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)
status_class_5xx:apiserver_request_total:ratio_rate2h{job="apiserver"} > (3*0.010000)
)
or
(
status_class_5xx:apiserver_request_total:ratio_rate3d{job="apiserver"} > (0.010000)
and
sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
for: 1h
status_class_5xx:apiserver_request_total:ratio_rate6h{job="apiserver"} > (0.010000)
)
labels:
long: 1d
job: apiserver
severity: warning
short: 2h
- alert: KubeAPIErrorBudgetBurn
annotations:
message: The API server is burning too much error budget
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn
expr: |
sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)
and
sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
for: 3h
labels:
long: 3d
severity: warning
short: 6h
- name: kubernetes-system-apiserver
rules:
- alert: KubeAPILatencyHigh
@ -1409,6 +1027,29 @@ spec:
for: 5m
labels:
severity: warning
- alert: KubeAPILatencyHigh
annotations:
message: The API server has a 99th percentile latency of {{ $value }} seconds
for {{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} > 4
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value | humanizePercentage
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
}}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.10
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value | humanizePercentage
@ -1486,7 +1127,8 @@ spec:
message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
expr: |
(kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key="ToBeDeletedByClusterAutoscaler"}) == 1
kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1
for: 2m
labels:
severity: warning
- alert: KubeletTooManyPods
@ -1525,7 +1167,7 @@ spec:
on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
expr: |
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"} > 60
histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet", metrics_path="/metrics"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60
for: 15m
labels:
severity: warning
@ -1690,8 +1332,8 @@ spec:
- alert: PrometheusRemoteStorageFailures
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
$labels.url }}
{{ printf "%.1f" $value }}% of the samples to {{ if $labels.queue }}{{ $labels.queue
}}{{ else }}{{ $labels.url }}{{ end }}.
summary: Prometheus fails to send samples to remote storage.
expr: |
(
@ -1711,8 +1353,8 @@ spec:
- alert: PrometheusRemoteWriteBehind
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
}}.
is {{ printf "%.1f" $value }}s behind for {{ if $labels.queue }}{{ $labels.queue
}}{{ else }}{{ $labels.url }}{{ end }}.
summary: Prometheus remote write is behind.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
@ -1729,9 +1371,8 @@ spec:
- alert: PrometheusRemoteWriteDesiredShards
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
desired shards calculation wants to run {{ $value }} shards for queue {{
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
desired shards calculation wants to run {{ $value }} shards, which is more
than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
$labels.instance | query | first | value }}.
summary: Prometheus remote write desired shards calculation wants to run more
than configured max shards.
@ -1818,6 +1459,17 @@ spec:
expr: vector(1)
labels:
severity: none
- name: node-time
rules:
- alert: ClockSkewDetected
annotations:
message: Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod
}}. Ensure NTP is configured correctly on this host.
expr: |
abs(node_timex_offset_seconds{job="node-exporter"}) > 0.05
for: 2m
labels:
severity: warning
- name: node-network
rules:
- alert: NodeNetworkInterfaceFlapping