Update image and lib versions

This commit is contained in:
CarlosEDP 2019-08-08 17:09:53 -03:00
parent 23af384d74
commit 7375469d1c
33 changed files with 10588 additions and 416 deletions

View File

@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {

View File

@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local vars = import 'vars.jsonnet';
{

View File

@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {

View File

@ -1,29 +1,29 @@
{
_config+:: {
versions+:: {
prometheus: 'v2.7.0',
alertmanager: 'v0.16.0',
kubeStateMetrics: 'v1.5.0',
prometheus: 'v2.11.1',
alertmanager: 'v0.18.0',
kubeStateMetrics: 'v1.7.2',
kubeRbacProxy: 'v0.4.1',
addonResizer: 'v1.8.4',
nodeExporter: 'v0.17.0',
prometheusOperator: 'v0.28.0',
prometheusAdapter: 'v0.4.1',
grafana: '5.4.3',
nodeExporter: 'v0.18.1',
prometheusOperator: 'v0.31.1',
prometheusAdapter: 'v0.5.0',
grafana: '6.3.2',
configmapReloader: 'v0.2.2',
prometheusConfigReloader: 'v0.28.0',
prometheusConfigReloader: 'v0.31.1',
armExporter: 'latest',
smtpServer: 'v1.0.1',
elasticExporter: '1.0.4rc1',
},
imageRepos+:: {
prometheus: 'carlosedp/prometheus',
alertmanager: 'carlosedp/alertmanager',
prometheus: 'prom/prometheus',
alertmanager: 'prom/alertmanager',
kubeStateMetrics: 'carlosedp/kube-state-metrics',
kubeRbacProxy: 'carlosedp/kube-rbac-proxy',
addonResizer: 'carlosedp/addon-resizer',
nodeExporter: 'carlosedp/node_exporter',
nodeExporter: 'prom/node-exporter',
prometheusOperator: 'carlosedp/prometheus-operator',
prometheusAdapter: 'carlosedp/k8s-prometheus-adapter',
grafana: 'grafana/grafana',

View File

@ -8,7 +8,7 @@
"subdir": "jsonnet/kube-prometheus"
}
},
"version": "3a64636eb7ee2fe4ee5e5b8bc78822d405a41a1c"
"version": "4adb70b017e9a4ecb884a636dfef6fcae7d4bed8"
},
{
"name": "ksonnet",
@ -18,7 +18,7 @@
"subdir": ""
}
},
"version": "d03da231d6c8bd74437b74a1e9e8b966f13dffa2"
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f"
},
{
"name": "kubernetes-mixin",
@ -28,7 +28,7 @@
"subdir": ""
}
},
"version": "99ceb3cfbd00c9d86e3f2c6bbbce604781dd0f82"
"version": "0afc72e70df6048c6b65fd3e4968e53b0812b30c"
},
{
"name": "grafonnet",
@ -38,7 +38,7 @@
"subdir": "grafonnet"
}
},
"version": "a6896d19aedc46ecf80dd64967191b9fd6f75f45"
"version": "69bc267211790a1c3f4ea6e6211f3e8ffe22f987"
},
{
"name": "grafana-builder",
@ -48,7 +48,7 @@
"subdir": "grafana-builder"
}
},
"version": "6c6f20cbef5018affdea9757db63fb574d3f3bf6"
"version": "3c44dfa9bfe2b66985733d4b16e0afd29094b4a0"
},
{
"name": "grafana",
@ -58,7 +58,7 @@
"subdir": "grafana"
}
},
"version": "b6db6bdbdc8d7f2f8834a8044897ea6322a0f6ad"
"version": "c27d2792764867cdaf6484f067cc875cb8aef2f6"
},
{
"name": "prometheus-operator",
@ -68,7 +68,7 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "7a25bf6b6bb2347dacb235659b73bc210117acc7"
"version": "6efd4e5e12213021516c10b3ebd0699260ddd804"
},
{
"name": "etcd-mixin",
@ -78,7 +78,17 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "4d6ebafa54ac72308f40e07dacc1fd803ceccab5"
"version": "43ce2eefaa0a4bdd5c1e825ff08a32e6e46f3343"
},
{
"name": "prometheus",
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "ff40de7ca6084f5aab1f3971025c00c217615589"
}
]
}

View File

@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local vars = import 'vars.jsonnet';
local join_objects(objs) =

View File

@ -94,6 +94,7 @@ spec:
required:
- key
- operator
type: object
type: array
matchFields:
description: A list of node selector requirements
@ -127,7 +128,9 @@ spec:
required:
- key
- operator
type: object
type: array
type: object
weight:
description: Weight associated with matching the corresponding
nodeSelectorTerm, in the range 1-100.
@ -136,6 +139,7 @@ spec:
required:
- weight
- preference
type: object
type: array
requiredDuringSchedulingIgnoredDuringExecution:
description: A node selector represents the union of the results
@ -184,6 +188,7 @@ spec:
required:
- key
- operator
type: object
type: array
matchFields:
description: A list of node selector requirements
@ -217,10 +222,14 @@ spec:
required:
- key
- operator
type: object
type: array
type: object
type: array
required:
- nodeSelectorTerms
type: object
type: object
podAffinity:
description: Pod affinity is a group of inter pod affinity scheduling
rules.
@ -287,6 +296,7 @@ spec:
required:
- key
- operator
type: object
type: array
matchLabels:
description: matchLabels is a map of {key,value}
@ -296,6 +306,7 @@ spec:
and the values array contains only "value".
The requirements are ANDed.
type: object
type: object
namespaces:
description: namespaces specifies which namespaces
the labelSelector applies to (matches against);
@ -314,6 +325,7 @@ spec:
type: string
required:
- topologyKey
type: object
weight:
description: weight associated with matching the corresponding
podAffinityTerm, in the range 1-100.
@ -322,6 +334,7 @@ spec:
required:
- weight
- podAffinityTerm
type: object
type: array
requiredDuringSchedulingIgnoredDuringExecution:
description: If the affinity requirements specified by this
@ -378,6 +391,7 @@ spec:
required:
- key
- operator
type: object
type: array
matchLabels:
description: matchLabels is a map of {key,value} pairs.
@ -386,6 +400,7 @@ spec:
is "key", the operator is "In", and the values array
contains only "value". The requirements are ANDed.
type: object
type: object
namespaces:
description: namespaces specifies which namespaces the
labelSelector applies to (matches against); null or
@ -404,7 +419,9 @@ spec:
type: string
required:
- topologyKey
type: object
type: array
type: object
podAntiAffinity:
description: Pod anti affinity is a group of inter pod anti affinity
scheduling rules.
@ -472,6 +489,7 @@ spec:
required:
- key
- operator
type: object
type: array
matchLabels:
description: matchLabels is a map of {key,value}
@ -481,6 +499,7 @@ spec:
and the values array contains only "value".
The requirements are ANDed.
type: object
type: object
namespaces:
description: namespaces specifies which namespaces
the labelSelector applies to (matches against);
@ -499,6 +518,7 @@ spec:
type: string
required:
- topologyKey
type: object
weight:
description: weight associated with matching the corresponding
podAffinityTerm, in the range 1-100.
@ -507,6 +527,7 @@ spec:
required:
- weight
- podAffinityTerm
type: object
type: array
requiredDuringSchedulingIgnoredDuringExecution:
description: If the anti-affinity requirements specified by
@ -563,6 +584,7 @@ spec:
required:
- key
- operator
type: object
type: array
matchLabels:
description: matchLabels is a map of {key,value} pairs.
@ -571,6 +593,7 @@ spec:
is "key", the operator is "In", and the values array
contains only "value". The requirements are ANDed.
type: object
type: object
namespaces:
description: namespaces specifies which namespaces the
labelSelector applies to (matches against); null or
@ -589,7 +612,10 @@ spec:
type: string
required:
- topologyKey
type: object
type: array
type: object
type: object
baseImage:
description: Base image that is used to deploy pods, without tag.
type: string
@ -672,6 +698,7 @@ spec:
type: boolean
required:
- key
type: object
fieldRef:
description: ObjectFieldSelector selects an APIVersioned
field of an object.
@ -686,6 +713,7 @@ spec:
type: string
required:
- fieldPath
type: object
resourceFieldRef:
description: ResourceFieldSelector represents container
resources (cpu, memory) and their output format
@ -700,6 +728,7 @@ spec:
type: string
required:
- resource
type: object
secretKeyRef:
description: SecretKeySelector selects a key of a Secret.
properties:
@ -716,8 +745,11 @@ spec:
type: boolean
required:
- key
type: object
type: object
required:
- name
type: object
type: array
envFrom:
description: List of sources to populate environment variables
@ -743,6 +775,7 @@ spec:
optional:
description: Specify whether the ConfigMap must be defined
type: boolean
type: object
prefix:
description: An optional identifier to prepend to each key
in the ConfigMap. Must be a C_IDENTIFIER.
@ -759,6 +792,8 @@ spec:
optional:
description: Specify whether the Secret must be defined
type: boolean
type: object
type: object
type: array
image:
description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images
@ -798,6 +833,7 @@ spec:
items:
type: string
type: array
type: object
httpGet:
description: HTTPGetAction describes an action based on
HTTP Get requests.
@ -823,6 +859,7 @@ spec:
required:
- name
- value
type: object
type: array
path:
description: Path to access on the HTTP server.
@ -837,6 +874,7 @@ spec:
type: string
required:
- port
type: object
tcpSocket:
description: TCPSocketAction describes an action based
on opening a socket
@ -851,6 +889,8 @@ spec:
- type: integer
required:
- port
type: object
type: object
preStop:
description: Handler defines a specific action that should
be taken
@ -871,6 +911,7 @@ spec:
items:
type: string
type: array
type: object
httpGet:
description: HTTPGetAction describes an action based on
HTTP Get requests.
@ -896,6 +937,7 @@ spec:
required:
- name
- value
type: object
type: array
path:
description: Path to access on the HTTP server.
@ -910,6 +952,7 @@ spec:
type: string
required:
- port
type: object
tcpSocket:
description: TCPSocketAction describes an action based
on opening a socket
@ -924,6 +967,9 @@ spec:
- type: integer
required:
- port
type: object
type: object
type: object
livenessProbe:
description: Probe describes a health check to be performed against
a container to determine whether it is alive or ready to receive
@ -944,6 +990,7 @@ spec:
items:
type: string
type: array
type: object
failureThreshold:
description: Minimum consecutive failures for the probe to
be considered failed after having succeeded. Defaults to
@ -975,6 +1022,7 @@ spec:
required:
- name
- value
type: object
type: array
path:
description: Path to access on the HTTP server.
@ -989,6 +1037,7 @@ spec:
type: string
required:
- port
type: object
initialDelaySeconds:
description: 'Number of seconds after the container has started
before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
@ -1019,12 +1068,14 @@ spec:
- type: integer
required:
- port
type: object
timeoutSeconds:
description: 'Number of seconds after which the probe times
out. Defaults to 1 second. Minimum value is 1. More info:
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
type: object
name:
description: Name of the container specified as a DNS_LABEL. Each
container in a pod must have a unique name (DNS_LABEL). Cannot
@ -1069,6 +1120,7 @@ spec:
type: string
required:
- containerPort
type: object
type: array
readinessProbe:
description: Probe describes a health check to be performed against
@ -1090,6 +1142,7 @@ spec:
items:
type: string
type: array
type: object
failureThreshold:
description: Minimum consecutive failures for the probe to
be considered failed after having succeeded. Defaults to
@ -1121,6 +1174,7 @@ spec:
required:
- name
- value
type: object
type: array
path:
description: Path to access on the HTTP server.
@ -1135,6 +1189,7 @@ spec:
type: string
required:
- port
type: object
initialDelaySeconds:
description: 'Number of seconds after the container has started
before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
@ -1165,12 +1220,14 @@ spec:
- type: integer
required:
- port
type: object
timeoutSeconds:
description: 'Number of seconds after which the probe times
out. Defaults to 1 second. Minimum value is 1. More info:
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
type: object
resources:
description: ResourceRequirements describes the compute resource
requirements.
@ -1185,6 +1242,7 @@ spec:
it defaults to Limits if that is explicitly specified, otherwise
to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
type: object
securityContext:
description: SecurityContext holds security configuration that
will be applied to a container. Some fields are present in both
@ -1213,6 +1271,7 @@ spec:
items:
type: string
type: array
type: object
privileged:
description: Run container in privileged mode. Processes in
privileged containers are essentially equivalent to root
@ -1274,6 +1333,8 @@ spec:
description: User is a SELinux user label that applies
to the container.
type: string
type: object
type: object
stdin:
description: Whether this container should allocate a buffer for
stdin in the container runtime. If this is not set, reads from
@ -1331,6 +1392,7 @@ spec:
required:
- name
- devicePath
type: object
type: array
volumeMounts:
description: Pod volumes to mount into the container's filesystem.
@ -1360,9 +1422,18 @@ spec:
description: Path within the volume from which the container's
volume should be mounted. Defaults to "" (volume's root).
type: string
subPathExpr:
description: Expanded path within the volume from which
the container's volume should be mounted. Behaves similarly
to SubPath but environment variable references $(VAR_NAME)
are expanded using the container's environment. Defaults
to "" (volume's root). SubPathExpr and SubPath are mutually
exclusive. This field is alpha in 1.14.
type: string
required:
- name
- mountPath
type: object
type: array
workingDir:
description: Container's working directory. If not specified,
@ -1371,6 +1442,7 @@ spec:
type: string
required:
- name
type: object
type: array
externalUrl:
description: The external URL the Alertmanager instances will be available
@ -1394,12 +1466,16 @@ spec:
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
type: object
type: array
listenLocal:
description: ListenLocal makes the Alertmanager server listen on loopback,
so that it does not bind against the Pod IP. Note this is only for
the Alertmanager UI, not the gossip communication.
type: boolean
logFormat:
description: Log format for Alertmanager to be configured with.
type: string
logLevel:
description: Log level for Alertmanager to be configured with.
type: string
@ -1485,6 +1561,7 @@ spec:
type: string
required:
- name
type: object
type: array
result:
description: Status is a return value for calls that don't return
@ -1537,6 +1614,7 @@ spec:
the cause of the error. If this value is empty
there is no information available.
type: string
type: object
type: array
group:
description: The group attribute of the resource associated
@ -1566,6 +1644,7 @@ spec:
single resource which can be described). More info:
http://kubernetes.io/docs/user-guide/identifiers#uids'
type: string
type: object
kind:
description: 'Kind is a string value representing the REST
resource this object represents. Servers may infer this
@ -1607,6 +1686,7 @@ spec:
description: selfLink is a URL representing this object.
Populated by the system. Read-only.
type: string
type: object
reason:
description: A machine-readable description of why this
operation is in the "Failure" status. If this value is
@ -1617,14 +1697,54 @@ spec:
description: 'Status of the operation. One of: "Success"
or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status'
type: string
type: object
required:
- pending
type: object
labels:
description: 'Map of string keys and values that can be used to
organize and categorize (scope and select) objects. May match
selectors of replication controllers and services. More info:
http://kubernetes.io/docs/user-guide/labels'
type: object
managedFields:
description: |-
ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object.
This field is alpha and can be changed or removed without notice.
items:
description: ManagedFieldsEntry is a workflow-id, a FieldSet and
the group version of the resource that the fieldset applies
to.
properties:
apiVersion:
description: APIVersion defines the version of this resource
that this field set applies to. The format is "group/version"
just like the top-level APIVersion field. It is necessary
to track the version of a field set because it cannot be
automatically converted.
type: string
fields:
description: 'Fields stores a set of fields in a data structure
like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff'
type: object
manager:
description: Manager is an identifier of the workflow managing
these fields.
type: string
operation:
description: Operation is the type of operation which lead
to this ManagedFieldsEntry being created. The only valid
values for this field are 'Apply' and 'Update'.
type: string
time:
description: Time is a wrapper around time.Time which supports
correct marshaling to YAML and JSON. Wrappers are provided
for many of the factory methods that the time package offers.
format: date-time
type: string
type: object
type: array
name:
description: 'Name must be unique within a namespace. Is required
when creating resources, although some resources may allow a client
@ -1678,6 +1798,7 @@ spec:
- kind
- name
- uid
type: object
type: array
resourceVersion:
description: |-
@ -1695,6 +1816,7 @@ spec:
Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids
type: string
type: object
priorityClassName:
description: Priority class assigned to the Pods
type: string
@ -1717,6 +1839,7 @@ spec:
to Limits if that is explicitly specified, otherwise to an implementation-defined
value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
type: object
retention:
description: Time duration Alertmanager shall retain data for. Default
is '120h', and must match the regular expression `[0-9]+(ms|s|m|h)`
@ -1795,6 +1918,7 @@ spec:
description: User is a SELinux user label that applies to the
container.
type: string
type: object
supplementalGroups:
description: A list of groups applied to the first process run in
each container, in addition to the container's primary GID. If
@ -1819,7 +1943,9 @@ spec:
required:
- name
- value
type: object
type: array
type: object
serviceAccountName:
description: ServiceAccountName is the name of the ServiceAccount to
use to run the Prometheus Pods.
@ -1846,6 +1972,7 @@ spec:
Must be an empty string (default) or Memory. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir'
type: string
sizeLimit: {}
type: object
volumeClaimTemplate:
description: PersistentVolumeClaim is a user's request for and claim
to a persistent volume
@ -1943,6 +2070,7 @@ spec:
type: string
required:
- name
type: object
type: array
result:
description: Status is a return value for calls that
@ -1998,6 +2126,7 @@ spec:
of the cause of the error. If this value
is empty there is no information available.
type: string
type: object
type: array
group:
description: The group attribute of the resource
@ -2028,6 +2157,7 @@ spec:
is a single resource which can be described).
More info: http://kubernetes.io/docs/user-guide/identifiers#uids'
type: string
type: object
kind:
description: 'Kind is a string value representing
the REST resource this object represents. Servers
@ -2074,6 +2204,7 @@ spec:
description: selfLink is a URL representing
this object. Populated by the system. Read-only.
type: string
type: object
reason:
description: A machine-readable description of why
this operation is in the "Failure" status. If
@ -2085,14 +2216,57 @@ spec:
description: 'Status of the operation. One of: "Success"
or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status'
type: string
type: object
required:
- pending
type: object
labels:
description: 'Map of string keys and values that can be
used to organize and categorize (scope and select) objects.
May match selectors of replication controllers and services.
More info: http://kubernetes.io/docs/user-guide/labels'
type: object
managedFields:
description: |-
ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object.
This field is alpha and can be changed or removed without notice.
items:
description: ManagedFieldsEntry is a workflow-id, a FieldSet
and the group version of the resource that the fieldset
applies to.
properties:
apiVersion:
description: APIVersion defines the version of this
resource that this field set applies to. The format
is "group/version" just like the top-level APIVersion
field. It is necessary to track the version of a
field set because it cannot be automatically converted.
type: string
fields:
description: 'Fields stores a set of fields in a data
structure like a Trie. To understand how this is
used, see: https://github.com/kubernetes-sigs/structured-merge-diff'
type: object
manager:
description: Manager is an identifier of the workflow
managing these fields.
type: string
operation:
description: Operation is the type of operation which
lead to this ManagedFieldsEntry being created. The
only valid values for this field are 'Apply' and
'Update'.
type: string
time:
description: Time is a wrapper around time.Time which
supports correct marshaling to YAML and JSON. Wrappers
are provided for many of the factory methods that
the time package offers.
format: date-time
type: string
type: object
type: array
name:
description: 'Name must be unique within a namespace. Is
required when creating resources, although some resources
@ -2149,6 +2323,7 @@ spec:
- kind
- name
- uid
type: object
type: array
resourceVersion:
description: |-
@ -2166,6 +2341,7 @@ spec:
Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids
type: string
type: object
spec:
description: PersistentVolumeClaimSpec describes the common
attributes of storage devices and allows a Source for provider-specific
@ -2197,6 +2373,7 @@ spec:
required:
- kind
- name
type: object
resources:
description: ResourceRequirements describes the compute
resource requirements.
@ -2212,6 +2389,7 @@ spec:
explicitly specified, otherwise to an implementation-defined
value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
type: object
type: object
selector:
description: A label selector is a label query over a set
of resources. The result of matchLabels and matchExpressions
@ -2248,6 +2426,7 @@ spec:
required:
- key
- operator
type: object
type: array
matchLabels:
description: matchLabels is a map of {key,value} pairs.
@ -2256,6 +2435,7 @@ spec:
is "key", the operator is "In", and the values array
contains only "value". The requirements are ANDed.
type: object
type: object
storageClassName:
description: 'Name of the StorageClass required by the claim.
More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1'
@ -2269,6 +2449,7 @@ spec:
description: VolumeName is the binding reference to the
PersistentVolume backing this claim.
type: string
type: object
status:
description: PersistentVolumeClaimStatus is the current status
of a persistent volume claim.
@ -2323,10 +2504,14 @@ spec:
required:
- type
- status
type: object
type: array
phase:
description: Phase represents the current phase of PersistentVolumeClaim.
type: string
type: object
type: object
type: object
tag:
description: Tag of Alertmanager container image to be deployed. Defaults
to the value of `version`. Version is ignored if Tag is set.
@ -2369,10 +2554,12 @@ spec:
If the operator is Exists, the value should be empty, otherwise
just a regular string.
type: string
type: object
type: array
version:
description: Version the cluster should be on.
type: string
type: object
status:
description: 'AlertmanagerStatus is the most recent observed status of the
Alertmanager cluster. Read-only. Not included when requesting from the
@ -2408,4 +2595,6 @@ spec:
- updatedReplicas
- availableReplicas
- unavailableReplicas
type: object
type: object
version: v1

View File

@ -0,0 +1,235 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
creationTimestamp: null
name: podmonitors.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
kind: PodMonitor
plural: podmonitors
scope: Namespaced
validation:
openAPIV3Schema:
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
spec:
description: PodMonitorSpec contains specification parameters for a PodMonitor.
properties:
jobLabel:
description: The label to use to retrieve the job name from.
type: string
namespaceSelector:
description: NamespaceSelector is a selector for selecting either all
namespaces or a list of namespaces.
properties:
any:
description: Boolean describing whether all namespaces are selected
in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
items:
type: string
type: array
type: object
podMetricsEndpoints:
description: A list of endpoints allowed as part of this PodMonitor.
items:
description: PodMetricsEndpoint defines a scrapeable endpoint of a
Kubernetes Pod serving Prometheus metrics.
properties:
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. defailt is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
params:
description: Optional HTTP URL parameters
type: object
path:
description: HTTP path to scrape for metrics.
type: string
port:
description: Name of the port this endpoint refers to. Mutually
exclusive with targetPort.
type: string
proxyUrl:
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
to proxy through this endpoint.
type: string
relabelings:
description: 'RelabelConfigs to apply to samples before ingestion.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. defailt is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
scheme:
description: HTTP scheme to use for scraping.
type: string
scrapeTimeout:
description: Timeout after which the scrape is ended
type: string
targetPort:
anyOf:
- type: string
- type: integer
type: object
type: array
podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod
onto the target.
items:
type: string
type: array
sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped
samples that will be accepted.
format: int64
type: integer
selector:
description: A label selector is a label query over a set of resources.
The result of matchLabels and matchExpressions are ANDed. An empty
label selector matches all objects. A null label selector matches
no objects.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that contains
values, a key, and an operator that relates the key and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to a
set of values. Valid operators are In, NotIn, Exists and
DoesNotExist.
type: string
values:
description: values is an array of string values. If the operator
is In or NotIn, the values array must be non-empty. If the
operator is Exists or DoesNotExist, the values array must
be empty. This array is replaced during a strategic merge
patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator is
"In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
required:
- podMetricsEndpoints
- selector
type: object
type: object
version: v1

View File

@ -96,6 +96,7 @@ spec:
type: string
required:
- name
type: object
type: array
result:
description: Status is a return value for calls that don't return
@ -148,6 +149,7 @@ spec:
cause of the error. If this value is empty there
is no information available.
type: string
type: object
type: array
group:
description: The group attribute of the resource associated
@ -175,6 +177,7 @@ spec:
description: 'UID of the resource. (when there is a single
resource which can be described). More info: http://kubernetes.io/docs/user-guide/identifiers#uids'
type: string
type: object
kind:
description: 'Kind is a string value representing the REST resource
this object represents. Servers may infer this from the endpoint
@ -215,6 +218,7 @@ spec:
description: selfLink is a URL representing this object.
Populated by the system. Read-only.
type: string
type: object
reason:
description: A machine-readable description of why this operation
is in the "Failure" status. If this value is empty there is
@ -225,13 +229,52 @@ spec:
description: 'Status of the operation. One of: "Success" or
"Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status'
type: string
type: object
required:
- pending
type: object
labels:
description: 'Map of string keys and values that can be used to organize
and categorize (scope and select) objects. May match selectors of
replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels'
type: object
managedFields:
description: |-
ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object.
This field is alpha and can be changed or removed without notice.
items:
description: ManagedFieldsEntry is a workflow-id, a FieldSet and the
group version of the resource that the fieldset applies to.
properties:
apiVersion:
description: APIVersion defines the version of this resource that
this field set applies to. The format is "group/version" just
like the top-level APIVersion field. It is necessary to track
the version of a field set because it cannot be automatically
converted.
type: string
fields:
description: 'Fields stores a set of fields in a data structure
like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff'
type: object
manager:
description: Manager is an identifier of the workflow managing
these fields.
type: string
operation:
description: Operation is the type of operation which lead to
this ManagedFieldsEntry being created. The only valid values
for this field are 'Apply' and 'Update'.
type: string
time:
description: Time is a wrapper around time.Time which supports
correct marshaling to YAML and JSON. Wrappers are provided
for many of the factory methods that the time package offers.
format: date-time
type: string
type: object
type: array
name:
description: 'Name must be unique within a namespace. Is required when
creating resources, although some resources may allow a client to
@ -284,6 +327,7 @@ spec:
- kind
- name
- uid
type: object
type: array
resourceVersion:
description: |-
@ -301,6 +345,7 @@ spec:
Populated by the system. Read-only. More info: http://kubernetes.io/docs/user-guide/identifiers#uids
type: string
type: object
spec:
description: PrometheusRuleSpec contains specification parameters for a
Rule.
@ -335,9 +380,13 @@ spec:
type: string
required:
- expr
type: object
type: array
required:
- name
- rules
type: object
type: array
type: object
type: object
version: v1

View File

@ -52,6 +52,7 @@ spec:
type: boolean
required:
- key
type: object
username:
description: SecretKeySelector selects a key of a Secret.
properties:
@ -68,6 +69,8 @@ spec:
type: boolean
required:
- key
type: object
type: object
bearerTokenFile:
description: File to read bearer token for scraping targets.
type: string
@ -121,6 +124,7 @@ spec:
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
params:
description: Optional HTTP URL parameters
@ -180,6 +184,7 @@ spec:
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
scheme:
description: HTTP scheme to use for scraping.
@ -209,6 +214,8 @@ spec:
serverName:
description: Used to verify the hostname for the targets.
type: string
type: object
type: object
type: array
jobLabel:
description: The label to use to retrieve the job name from.
@ -226,6 +233,7 @@ spec:
items:
type: string
type: array
type: object
podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod
onto the target.
@ -271,6 +279,7 @@ spec:
required:
- key
- operator
type: object
type: array
matchLabels:
description: matchLabels is a map of {key,value} pairs. A single
@ -279,6 +288,7 @@ spec:
"In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
targetLabels:
description: TargetLabels transfers labels on the Kubernetes Service
onto the target.
@ -288,4 +298,6 @@ spec:
required:
- endpoints
- selector
type: object
type: object
version: v1

View File

@ -1,6 +1,10 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.28.0
name: prometheus-operator
rules:
- apiGroups:
@ -17,6 +21,7 @@ rules:
- prometheuses/finalizers
- alertmanagers/finalizers
- servicemonitors
- podmonitors
- prometheusrules
verbs:
- '*'

View File

@ -1,6 +1,10 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.28.0
name: prometheus-operator
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@ -1,19 +1,24 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: prometheus-operator
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.28.0
name: prometheus-operator
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
k8s-app: prometheus-operator
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
template:
metadata:
labels:
k8s-app: prometheus-operator
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.28.0
spec:
containers:
- args:
@ -35,7 +40,6 @@ spec:
memory: 100Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:

View File

@ -2,7 +2,9 @@ apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: prometheus-operator
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.28.0
name: prometheus-operator
namespace: monitoring
spec:
@ -12,4 +14,5 @@ spec:
port: 8080
targetPort: http
selector:
k8s-app: prometheus-operator
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator

View File

@ -1,5 +1,9 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.28.0
name: prometheus-operator
namespace: monitoring

View File

@ -2,7 +2,9 @@ apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: prometheus-operator
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.28.0
name: prometheus-operator
namespace: monitoring
spec:
@ -11,4 +13,6 @@ spec:
port: http
selector:
matchLabels:
k8s-app: prometheus-operator
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.28.0

View File

@ -22,7 +22,7 @@ spec:
weight: 100
baseImage: carlosedp/alertmanager
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
replicas: 1
securityContext:
fsGroup: 2000

File diff suppressed because it is too large Load Diff

View File

@ -42,6 +42,12 @@ spec:
- mountPath: /etc/grafana/provisioning/dashboards
name: grafana-dashboards
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/apiserver
name: grafana-dashboard-apiserver
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/controller-manager
name: grafana-dashboard-controller-manager
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/coredns-dashboard
name: grafana-dashboard-coredns-dashboard
readOnly: false
@ -66,6 +72,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace
name: grafana-dashboard-k8s-resources-workloads-namespace
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/kubelet
name: grafana-dashboard-kubelet
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/kubernetes-cluster-dashboard
name: grafana-dashboard-kubernetes-cluster-dashboard
readOnly: false
@ -81,6 +90,18 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/prometheus-dashboard
name: grafana-dashboard-prometheus-dashboard
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus
name: grafana-dashboard-prometheus
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/proxy
name: grafana-dashboard-proxy
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/scheduler
name: grafana-dashboard-scheduler
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/statefulset
name: grafana-dashboard-statefulset
readOnly: false
@ -102,6 +123,12 @@ spec:
- configMap:
name: grafana-dashboards
name: grafana-dashboards
- configMap:
name: grafana-dashboard-apiserver
name: grafana-dashboard-apiserver
- configMap:
name: grafana-dashboard-controller-manager
name: grafana-dashboard-controller-manager
- configMap:
name: grafana-dashboard-coredns-dashboard
name: grafana-dashboard-coredns-dashboard
@ -126,6 +153,9 @@ spec:
- configMap:
name: grafana-dashboard-k8s-resources-workloads-namespace
name: grafana-dashboard-k8s-resources-workloads-namespace
- configMap:
name: grafana-dashboard-kubelet
name: grafana-dashboard-kubelet
- configMap:
name: grafana-dashboard-kubernetes-cluster-dashboard
name: grafana-dashboard-kubernetes-cluster-dashboard
@ -141,6 +171,18 @@ spec:
- configMap:
name: grafana-dashboard-prometheus-dashboard
name: grafana-dashboard-prometheus-dashboard
- configMap:
name: grafana-dashboard-prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
- configMap:
name: grafana-dashboard-prometheus
name: grafana-dashboard-prometheus
- configMap:
name: grafana-dashboard-proxy
name: grafana-dashboard-proxy
- configMap:
name: grafana-dashboard-scheduler
name: grafana-dashboard-scheduler
- configMap:
name: grafana-dashboard-statefulset
name: grafana-dashboard-statefulset

View File

@ -27,6 +27,7 @@ rules:
- daemonsets
- deployments
- replicasets
- ingresses
verbs:
- list
- watch
@ -74,3 +75,17 @@ rules:
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
verbs:
- list
- watch

View File

@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
@ -94,7 +94,7 @@ spec:
cpu: 10m
memory: 30Mi
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534

View File

@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
@ -61,23 +61,20 @@ spec:
resources:
limits:
cpu: 20m
memory: 40Mi
memory: 60Mi
requests:
cpu: 10m
memory: 20Mi
hostNetwork: true
hostPID: true
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534
serviceAccountName: node-exporter
tolerations:
- effect: NoExecute
operator: Exists
- effect: NoSchedule
operator: Exists
- operator: Exists
volumes:
- hostPath:
path: /proc

View File

@ -10,6 +10,13 @@ spec:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https
relabelings:
- action: replace
regex: (.*)
replacment: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
scheme: https
tlsConfig:
insecureSkipVerify: true

View File

@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-adapter
@ -40,7 +40,7 @@ spec:
name: config
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
serviceAccountName: prometheus-adapter
volumes:
- emptyDir: {}

View File

@ -28,7 +28,8 @@ spec:
baseImage: carlosedp/prometheus
externalUrl: http://prometheus.192.168.99.100.nip.io
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
podMonitorSelector: {}
replicas: 1
resources:
requests:

View File

@ -11,44 +11,37 @@ spec:
- name: k8s.rules
rules:
- expr: |
sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace)
sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])) by (namespace)
record: namespace:container_cpu_usage_seconds_total:sum_rate
- expr: |
sum by (namespace, pod_name, container_name) (
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])
sum by (namespace, pod, container) (
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])
)
record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate
record: namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
- expr: |
sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace)
sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
- expr: |
sum by (namespace, label_name) (
sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace, pod_name)
* on (namespace, pod_name) group_left(label_name)
label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
sum(container_memory_usage_bytes{job="kubelet",image!="", container!="POD"}) by (pod, namespace)
* on (namespace, pod)
group_left(label_name) kube_pod_labels{job="kube-state-metrics"}
)
record: namespace_name:container_cpu_usage_seconds_total:sum_rate
- expr: |
sum by (namespace, label_name) (
sum(container_memory_usage_bytes{job="kubelet",image!="", container_name!=""}) by (pod_name, namespace)
* on (namespace, pod_name) group_left(label_name)
label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
)
record: namespace_name:container_memory_usage_bytes:sum
record: namespace:container_memory_usage_bytes:sum
- expr: |
sum by (namespace, label_name) (
sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod)
* on (namespace, pod) group_left(label_name)
label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
* on (namespace, pod)
group_left(label_name) kube_pod_labels{job="kube-state-metrics"}
)
record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum
record: namespace:kube_pod_container_resource_requests_memory_bytes:sum
- expr: |
sum by (namespace, label_name) (
sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod)
* on (namespace, pod) group_left(label_name)
label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
* on (namespace, pod)
group_left(label_name) kube_pod_labels{job="kube-state-metrics"}
)
record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum
record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
- expr: |
sum(
label_replace(
@ -85,67 +78,67 @@ spec:
- name: kube-scheduler.rules
rules:
- expr: |
histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.99"
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.99"
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.99"
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.9"
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.9"
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.9, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.9"
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.5"
record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile
record: cluster_quantile:scheduler_e2e_scheduling_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.5"
record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile
record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.5, sum(rate(scheduler_binding_duration_seconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod))
labels:
quantile: "0.5"
record: cluster_quantile:scheduler_binding_latency:histogram_quantile
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- name: kube-apiserver.rules
rules:
- expr: |
histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
labels:
quantile: "0.99"
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
labels:
quantile: "0.9"
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06
histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
labels:
quantile: "0.5"
record: cluster_quantile:apiserver_request_latencies:histogram_quantile
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- name: node.rules
rules:
- expr: sum(min(kube_pod_info) by (node))
@ -462,7 +455,7 @@ spec:
state for longer than an hour.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
expr: |
sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) > 0
sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Failed|Pending|Unknown"}) > 0
for: 1h
labels:
severity: critical
@ -607,11 +600,11 @@ spec:
tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
expr: |
sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum)
/
sum(node:node_num_cpu:sum)
sum(kube_node_status_allocatable_cpu_cores)
>
(count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum)
(count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores)
for: 5m
labels:
severity: warning
@ -621,13 +614,13 @@ spec:
tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
expr: |
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum)
/
sum(node_memory_MemTotal_bytes)
sum(kube_node_status_allocatable_memory_bytes)
>
(count(node:node_num_cpu:sum)-1)
(count(kube_node_status_allocatable_memory_bytes)-1)
/
count(node:node_num_cpu:sum)
count(kube_node_status_allocatable_memory_bytes)
for: 5m
labels:
severity: warning
@ -638,7 +631,7 @@ spec:
expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
/
sum(node:node_num_cpu:sum)
sum(kube_node_status_allocatable_cpu_cores)
> 1.5
for: 5m
labels:
@ -650,7 +643,7 @@ spec:
expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
/
sum(node_memory_MemTotal_bytes{job="node-exporter"})
sum(kube_node_status_allocatable_memory_bytes{job="node-exporter"})
> 1.5
for: 5m
labels:
@ -671,12 +664,11 @@ spec:
- alert: CPUThrottlingHigh
annotations:
message: '{{ printf "%0.0f" $value }}% throttling of CPU in namespace {{ $labels.namespace
}} for container {{ $labels.container_name }} in pod {{ $labels.pod_name
}}.'
}} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!=\"\",
}[5m])) by (container_name, pod_name, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m]))
by (container_name, pod_name, namespace)\n > 25 \n"
expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\",
}[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m]))
by (container, pod, namespace)\n > 25 \n"
for: 15m
labels:
severity: warning
@ -740,7 +732,7 @@ spec:
components running.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
expr: |
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!="kube-dns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
for: 1h
labels:
severity: warning
@ -783,7 +775,7 @@ spec:
for {{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
for: 10m
labels:
severity: warning
@ -793,7 +785,7 @@ spec:
for {{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
for: 10m
labels:
severity: critical
@ -802,9 +794,9 @@ spec:
message: API server is returning errors for {{ $value }}% of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m]))
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3
sum(rate(apiserver_request_total{job="apiserver"}[5m])) * 100 > 3
for: 10m
labels:
severity: critical
@ -813,9 +805,9 @@ spec:
message: API server is returning errors for {{ $value }}% of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m]))
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1
sum(rate(apiserver_request_total{job="apiserver"}[5m])) * 100 > 1
for: 10m
labels:
severity: warning
@ -825,9 +817,9 @@ spec:
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
for: 10m
labels:
severity: critical
@ -837,9 +829,9 @@ spec:
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
for: 10m
labels:
severity: warning
@ -861,6 +853,200 @@ spec:
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
labels:
severity: critical
- name: prometheus
rules:
- alert: PrometheusBadConfig
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
reload its configuration.
summary: Failed Prometheus configuration reload.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"}[5m]) == 0
for: 10m
labels:
severity: critical
- alert: PrometheusNotificationQueueRunningFull
annotations:
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}}
is running full.
summary: Prometheus alert notification queue predicted to run full in less
than 30m.
expr: |
# Without min_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30)
>
min_over_time(prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}[5m])
)
for: 15m
labels:
severity: warning
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
annotations:
description: '{{ printf "%.1f" $value }}% errors while sending alerts from
Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
summary: Prometheus has encountered more than 1% errors sending alerts to
a specific Alertmanager.
expr: |
(
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
* 100
> 1
for: 15m
labels:
severity: warning
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
min without(alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical
- alert: PrometheusNotConnectedToAlertmanagers
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
to any Alertmanagers.
summary: Prometheus is not connected to any Alertmanagers.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"}[5m]) < 1
for: 10m
labels:
severity: warning
- alert: PrometheusTSDBReloadsFailing
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
{{$value | humanize}} reload failures over the last 3h.
summary: Prometheus has issues reloading blocks from disk.
expr: |
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
for: 4h
labels:
severity: warning
- alert: PrometheusTSDBCompactionsFailing
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
{{$value | humanize}} compaction failures over the last 3h.
summary: Prometheus has issues compacting blocks.
expr: |
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
for: 4h
labels:
severity: warning
- alert: PrometheusTSDBWALCorruptions
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
{{$value | humanize}} corruptions of the write-ahead log (WAL) over the
last 3h.
summary: Prometheus is detecting WAL corruptions.
expr: |
increase(tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
for: 4h
labels:
severity: warning
- alert: PrometheusNotIngestingSamples
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
samples.
summary: Prometheus is not ingesting samples.
expr: |
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
for: 10m
labels:
severity: warning
- alert: PrometheusDuplicateTimestamps
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
{{$value | humanize}} samples/s with different values but duplicated timestamp.
summary: Prometheus is dropping samples with duplicate timestamps.
expr: |
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 10m
labels:
severity: warning
- alert: PrometheusOutOfOrderTimestamps
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
{{$value | humanize}} samples/s with timestamps arriving out of order.
summary: Prometheus drops samples with out-of-order timestamps.
expr: |
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 10m
labels:
severity: warning
- alert: PrometheusRemoteStorageFailures
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
{{ printf "%.1f" $value }}% of the samples to queue {{$labels.queue}}.
summary: Prometheus fails to send samples to remote storage.
expr: |
(
rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
/
(
rate(prometheus_remote_storage_failed_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
+
rate(prometheus_remote_storage_succeeded_samples_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
)
* 100
> 1
for: 15m
labels:
severity: critical
- alert: PrometheusRemoteWriteBehind
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
is {{ printf "%.1f" $value }}s behind for queue {{$labels.queue}}.
summary: Prometheus remote write is behind.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
- on(job, instance) group_right
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
)
> 120
for: 15m
labels:
severity: critical
- alert: PrometheusRuleFailures
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
evaluate {{ printf "%.0f" $value }} rules in the last 5m.
summary: Prometheus is failing rule evaluations.
expr: |
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: critical
- alert: PrometheusMissingRuleEvaluations
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{
printf "%.0f" $value }} rule group evaluations in the last 5m.
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
expr: |
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: warning
- name: alertmanager.rules
rules:
- alert: AlertmanagerConfigInconsistent
@ -915,8 +1101,8 @@ spec:
rules:
- alert: NodeDiskRunningFull
annotations:
message: Device {{ $labels.device }} of node-exporter {{ $labels.namespace
}}/{{ $labels.pod }} will be full within the next 24 hours.
message: Device {{ $labels.device }} on node {{ $labels.instance }} will be
full within the next 24 hours.
expr: |
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)
for: 30m
@ -924,8 +1110,8 @@ spec:
severity: warning
- alert: NodeDiskRunningFull
annotations:
message: Device {{ $labels.device }} of node-exporter {{ $labels.namespace
}}/{{ $labels.pod }} will be full within the next 2 hours.
message: Device {{ $labels.device }} on node {{ $labels.instance }} will be
full within the next 2 hours.
expr: |
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)
for: 10m
@ -938,7 +1124,7 @@ spec:
message: Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod
}}. Ensure NTP is configured correctly on this host.
expr: |
abs(node_timex_offset_seconds{job="node-exporter"}) > 0.03
abs(node_timex_offset_seconds{job="node-exporter"}) > 0.05
for: 2m
labels:
severity: warning
@ -971,107 +1157,6 @@ spec:
for: 2m
labels:
severity: warning
- name: prometheus.rules
rules:
- alert: PrometheusConfigReloadFailed
annotations:
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
summary: Reloading Prometheus' configuration failed
expr: |
prometheus_config_last_reload_successful{job="prometheus-k8s",namespace="monitoring"} == 0
for: 10m
labels:
severity: warning
- alert: PrometheusNotificationQueueRunningFull
annotations:
description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
$labels.pod}}
summary: Prometheus' alert notification queue is running full
expr: |
predict_linear(prometheus_notifications_queue_length{job="prometheus-k8s",namespace="monitoring"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="prometheus-k8s",namespace="monitoring"}
for: 10m
labels:
severity: warning
- alert: PrometheusErrorSendingAlerts
annotations:
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
summary: Errors while sending alert from Prometheus
expr: |
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.01
for: 10m
labels:
severity: warning
- alert: PrometheusErrorSendingAlerts
annotations:
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
summary: Errors while sending alerts from Prometheus
expr: |
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) / rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0.03
for: 10m
labels:
severity: critical
- alert: PrometheusNotConnectedToAlertmanagers
annotations:
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
to any Alertmanagers
summary: Prometheus is not connected to any Alertmanagers
expr: |
prometheus_notifications_alertmanagers_discovered{job="prometheus-k8s",namespace="monitoring"} < 1
for: 10m
labels:
severity: warning
- alert: PrometheusTSDBReloadsFailing
annotations:
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}
reload failures over the last four hours.'
summary: Prometheus has issues reloading data blocks from disk
expr: |
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[2h]) > 0
for: 12h
labels:
severity: warning
- alert: PrometheusTSDBCompactionsFailing
annotations:
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}
compaction failures over the last four hours.'
summary: Prometheus has issues compacting sample blocks
expr: |
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[2h]) > 0
for: 12h
labels:
severity: warning
- alert: PrometheusTSDBWALCorruptions
annotations:
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead
log (WAL).'
summary: Prometheus write-ahead log is corrupted
expr: |
prometheus_tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0
for: 4h
labels:
severity: warning
- alert: PrometheusNotIngestingSamples
annotations:
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting
samples.
summary: Prometheus isn't ingesting samples
expr: |
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
for: 10m
labels:
severity: warning
- alert: PrometheusTargetScrapesDuplicate
annotations:
description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected
due to duplicate timestamps but different values'
summary: Prometheus has many samples rejected
expr: |
increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 10m
labels:
severity: warning
- name: prometheus-operator
rules:
- alert: PrometheusOperatorReconcileErrors

View File

@ -34,5 +34,5 @@ spec:
image: carlosedp/docker-smtp:v1.0.1
name: smtp-server
ports:
- containerPort: 25
name: smtp
- containerPort: smtp
name: 25

View File

@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {

View File

@ -8,11 +8,11 @@ export DOCKER_CLI_EXPERIMENTAL=enabled
REPO=carlosedp
AOR_VERSION=2.1
PROM_ADAPTER_VERSION=v0.4.1
KSM_VERSION=v1.5.0
PROM_OP_VERSION=v0.28.0
PROM_ADAPTER_VERSION=v0.5.0
KSM_VERSION=v1.7.2
PROM_OP_VERSION=v0.31.1
KUBE_RBAC_VERSION=v0.4.1
PROM_CONFIG_RELOADER_VERSION=v0.28.0
PROM_CONFIG_RELOADER_VERSION=v0.31.1
CONFIGMAP_RELOAD_VERSION=v0.2.2
#-------------------------------------------------------------------------------
# Kubernetes addon-resizer

View File

@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {

View File

@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {

View File

@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {