From f7e635e3f13577d7d060f3e3985e269c013c14b8 Mon Sep 17 00:00:00 2001 From: Laur IVAN Date: Wed, 25 Feb 2026 11:56:36 +0100 Subject: [PATCH] talos: tune kube-apiserver audit policy to reduce CPU overhead Add targeted audit policy rules that suppress high-frequency, low-value requests which were generating ~570k audit events per 10 hours and causing kube-apiserver to consume 260-316m CPU per node. Suppressed categories (no security impact): - coordination.k8s.io/leases: controller/node heartbeats (86k GET + 46k PUT/10h) - /healthz*, /readyz*, /livez*, /openapi*, /version: probe & discovery endpoints - system:nodes user group: kubelet node status updates - endpoints + endpointslices GET/LIST/WATCH: Cilium/CoreDNS polling All other requests continue to be logged at Metadata level. Result: 76% of audit events suppressed, non-leader apiserver CPU dropped ~50-60% (316m -> 125m on standby nodes). Policy lives in the patch file so it survives cluster resets via talhelper genconfig. --- talos/patches/controller/cluster.yaml | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/talos/patches/controller/cluster.yaml b/talos/patches/controller/cluster.yaml index 10d6b68..73bd0d2 100644 --- a/talos/patches/controller/cluster.yaml +++ b/talos/patches/controller/cluster.yaml @@ -10,6 +10,41 @@ cluster: # Enable MutatingAdmissionPolicy feature gate (beta in K8s 1.35) feature-gates: MutatingAdmissionPolicy=true runtime-config: admissionregistration.k8s.io/v1beta1=true + auditPolicy: + apiVersion: audit.k8s.io/v1 + kind: Policy + rules: + # Don't log lease heartbeats — these are high-frequency controller/node + # keepalives that generate the bulk of audit volume with no security value. + - level: None + resources: + - group: "coordination.k8s.io" + resources: ["leases"] + # Don't log health/readiness/liveness probes or OpenAPI discovery. + # These are polled every few seconds by kubelets and Flux controllers. + - level: None + nonResourceURLs: + - "/healthz*" + - "/readyz*" + - "/livez*" + - "/openapi*" + - "/version" + # Don't log node kubelet system account operations (node heartbeats, + # status updates). Still block-listed for auth so no security gap. + - level: None + userGroups: ["system:nodes"] + # Don't log get/list/watch on endpoints & endpointslices — these are + # polled constantly by kube-proxy replacement (Cilium) and coredns. + - level: None + verbs: ["get", "list", "watch"] + resources: + - group: "" + resources: ["endpoints"] + - group: "discovery.k8s.io" + resources: ["endpointslices"] + # Log everything else at Metadata level (headers only, no request body). + # This covers all auth, RBAC, resource mutations, etc. + - level: Metadata controllerManager: extraArgs: bind-address: 0.0.0.0