mirror of
				https://github.com/carlosedp/cluster-monitoring.git
				synced 2025-10-26 10:23:04 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			145 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			145 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
local utils = import '../utils.libsonnet';
 | 
						|
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
 | 
						|
 | 
						|
{
 | 
						|
  _config+:: {
 | 
						|
    namespace: 'monitoring',
 | 
						|
    replicas: 1,
 | 
						|
 | 
						|
    versions+:: {
 | 
						|
      elasticExporter: '1.0.2',
 | 
						|
    },
 | 
						|
 | 
						|
    imageRepos+:: {
 | 
						|
      elasticExporter: 'justwatch/elasticsearch_exporter',
 | 
						|
    },
 | 
						|
 | 
						|
    prometheus+:: {
 | 
						|
      namespaces+:: ['logging'],
 | 
						|
    },
 | 
						|
 | 
						|
    // Add custom dashboards
 | 
						|
    grafanaDashboards+:: {
 | 
						|
      'elasticsearch-dashboard.json': (import '../grafana-dashboards/elasticsearch-dashboard.json'),
 | 
						|
      'fluentd-dashboard.json': (import '../grafana-dashboards/fluentd-dashboard.json'),
 | 
						|
    },
 | 
						|
  },
 | 
						|
 | 
						|
  elasticExporter+:: {
 | 
						|
    deployment:
 | 
						|
      local deployment = k.apps.v1.deployment;
 | 
						|
      local container = k.apps.v1.deployment.mixin.spec.template.spec.containersType;
 | 
						|
      local containerPort = container.portsType;
 | 
						|
 | 
						|
      local podLabels = { 'k8s-app': 'elasticsearch-exporter' };
 | 
						|
      local elasticExporter =
 | 
						|
        container.new('elasticsearch-exporter',
 | 
						|
                      $._config.imageRepos.elasticExporter + ':' + $._config.versions.elasticExporter) +
 | 
						|
        container.withCommand([
 | 
						|
          '/bin/elasticsearch_exporter',
 | 
						|
          '-es.uri=http://elasticsearch.logging.svc:9200',
 | 
						|
          '-es.timeout=60s',
 | 
						|
          '-es.all=true',
 | 
						|
        ]) +
 | 
						|
        container.withPorts(containerPort.newNamed(9108, 'es-metrics')) +
 | 
						|
        container.mixin.securityContext.capabilities.withDrop(['SETPCAP', 'MKNOD', 'AUDIT_WRITE', 'CHOWN', 'NET_RAW', 'DAC_OVERRIDE', 'FOWNER', 'FSETID', 'KILL', 'SETGID', 'SETUID', 'NET_BIND_SERVICE', 'SYS_CHROOT', 'SETFCAP']) +
 | 
						|
        container.mixin.securityContext.withRunAsNonRoot(true) +
 | 
						|
        container.mixin.securityContext.withRunAsUser(1000) +
 | 
						|
        // container.mixin.securityContext.withReadOnlyRootFilesystem(true) +
 | 
						|
        container.mixin.resources.withRequests({ memory: '64Mi', cpu: '25m' }) +
 | 
						|
        container.mixin.resources.withLimits({ memory: '128Mi', cpu: '100m' }) +
 | 
						|
        container.mixin.livenessProbe.httpGet.withPath('/health') +
 | 
						|
        container.mixin.livenessProbe.httpGet.withPort(9108) +
 | 
						|
        container.mixin.livenessProbe.withInitialDelaySeconds(30) +
 | 
						|
        container.mixin.livenessProbe.withTimeoutSeconds(10) +
 | 
						|
 | 
						|
        container.mixin.readinessProbe.httpGet.withPath('/health') +
 | 
						|
        container.mixin.readinessProbe.httpGet.withPort(9108) +
 | 
						|
        container.mixin.readinessProbe.withInitialDelaySeconds(30) +
 | 
						|
        container.mixin.readinessProbe.withTimeoutSeconds(10);
 | 
						|
 | 
						|
      local c = [elasticExporter];
 | 
						|
 | 
						|
      deployment.new('elasticsearch-exporter', $._config.replicas, c, podLabels) +
 | 
						|
      deployment.mixin.metadata.withNamespace($._config.namespace) +
 | 
						|
      deployment.mixin.metadata.withLabels(podLabels) +
 | 
						|
      deployment.mixin.spec.selector.withMatchLabels(podLabels) +
 | 
						|
      deployment.mixin.spec.strategy.withType('RollingUpdate') +
 | 
						|
      deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(1) +
 | 
						|
      deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) +
 | 
						|
      deployment.mixin.spec.template.spec.withRestartPolicy('Always'),
 | 
						|
 | 
						|
    service:
 | 
						|
      local service = k.core.v1.service;
 | 
						|
      local servicePort = k.core.v1.service.mixin.spec.portsType;
 | 
						|
      local elasticExporterPorts = servicePort.newNamed('es-metrics', 9108, 'es-metrics');
 | 
						|
 | 
						|
      service.new('elasticsearch-exporter', $.elasticExporter.deployment.spec.selector.matchLabels, elasticExporterPorts) +
 | 
						|
      service.mixin.metadata.withNamespace($._config.namespace) +
 | 
						|
      service.mixin.metadata.withLabels({ 'k8s-app': 'elasticsearch-exporter' }),
 | 
						|
 | 
						|
    serviceMonitorElastic:
 | 
						|
      utils.newServiceMonitor(
 | 
						|
        'elasticsearch',
 | 
						|
        $._config.namespace,
 | 
						|
        { 'k8s-app': 'elasticsearch-exporter' },
 | 
						|
        'monitoring',
 | 
						|
        'es-metrics',
 | 
						|
        'http'
 | 
						|
      ),
 | 
						|
 | 
						|
    serviceMonitorFluentd:
 | 
						|
      utils.newServiceMonitor(
 | 
						|
        'fluentd-es',
 | 
						|
        $._config.namespace,
 | 
						|
        { 'k8s-app': 'fluentd-es' },
 | 
						|
        'logging',
 | 
						|
        'metrics',
 | 
						|
        'http'
 | 
						|
      ),
 | 
						|
  },
 | 
						|
  // Add Prometheus monitoring rules for ElasticSearch
 | 
						|
  prometheusRules+:: {
 | 
						|
    groups+: [
 | 
						|
      {
 | 
						|
        name: 'elasticsearch-k8s-rules',
 | 
						|
        rules: [
 | 
						|
          {
 | 
						|
            expr: '100 * (elasticsearch_filesystem_data_size_bytes - elasticsearch_filesystem_data_free_bytes) / elasticsearch_filesystem_data_size_bytes',
 | 
						|
            record: 'elasticsearch_filesystem_data_used_percent',
 | 
						|
          },
 | 
						|
          {
 | 
						|
            expr: '100 - elasticsearch_filesystem_data_used_percent',
 | 
						|
            record: 'elasticsearch_filesystem_data_free_percent',
 | 
						|
          },
 | 
						|
 | 
						|
          {
 | 
						|
            expr: 'elasticsearch_cluster_health_number_of_nodes < 3',
 | 
						|
            alert: 'ElasticsearchTooFewNodesRunning',
 | 
						|
            'for': '5m',
 | 
						|
            annotations: {
 | 
						|
              message: 'There are only {{$value}} < 3 ElasticSearch nodes running',
 | 
						|
              summary: 'ElasticSearch running on less than 3 nodes',
 | 
						|
            },
 | 
						|
            labels: {
 | 
						|
              severity: 'critical',
 | 
						|
            },
 | 
						|
          },
 | 
						|
          {
 | 
						|
            expr: 'elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"} > 0.9',
 | 
						|
            alert: 'ElasticsearchHeapTooHigh',
 | 
						|
            'for': '15m',
 | 
						|
            annotations: {
 | 
						|
              message: 'The heap usage is over 90% for 15m',
 | 
						|
              summary: 'ElasticSearch node {{$labels.node}} heap usage is high',
 | 
						|
            },
 | 
						|
            labels: {
 | 
						|
              severity: 'critical',
 | 
						|
            },
 | 
						|
          },
 | 
						|
        ],
 | 
						|
      },
 | 
						|
    ],
 | 
						|
  },
 | 
						|
}
 |