From a380ad8bd2329de54386f4176ff8a72c498fed2c Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Wed, 5 Nov 2025 15:38:42 +0100 Subject: [PATCH] mapper: add generation metrics Signed-off-by: Kristoffer Dalby --- hscontrol/mapper/batcher.go | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/hscontrol/mapper/batcher.go b/hscontrol/mapper/batcher.go index f1bec009..e808df7e 100644 --- a/hscontrol/mapper/batcher.go +++ b/hscontrol/mapper/batcher.go @@ -8,12 +8,22 @@ import ( "github.com/juanfont/headscale/hscontrol/state" "github.com/juanfont/headscale/hscontrol/types" "github.com/juanfont/headscale/hscontrol/types/change" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/puzpuzpuz/xsync/v4" "github.com/rs/zerolog/log" "tailscale.com/tailcfg" "tailscale.com/types/ptr" ) +var ( + mapResponseGenerated = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "headscale", + Name: "mapresponse_generated_total", + Help: "total count of mapresponses generated by response type and change type", + }, []string{"response_type", "change_type"}) +) + type batcherFunc func(cfg *types.Config, state *state.State) Batcher // Batcher defines the common interface for all batcher implementations. @@ -75,21 +85,32 @@ func generateMapResponse(nodeID types.NodeID, version tailcfg.CapabilityVersion, } var ( - mapResp *tailcfg.MapResponse - err error + mapResp *tailcfg.MapResponse + err error + responseType string ) + // Record metric when function exits + defer func() { + if err == nil && mapResp != nil && responseType != "" { + mapResponseGenerated.WithLabelValues(responseType, c.Change.String()).Inc() + } + }() + switch c.Change { case change.DERP: + responseType = "derp" mapResp, err = mapper.derpMapResponse(nodeID) case change.NodeCameOnline, change.NodeWentOffline: if c.IsSubnetRouter { // TODO(kradalby): This can potentially be a peer update of the old and new subnet router. + responseType = "full" mapResp, err = mapper.fullMapResponse(nodeID, version) } else { // Trust the change type for online/offline status to avoid race conditions // between NodeStore updates and change processing + responseType = "patch" onlineStatus := c.Change == change.NodeCameOnline mapResp, err = mapper.peerChangedPatchResponse(nodeID, []*tailcfg.PeerChange{ @@ -105,21 +126,26 @@ func generateMapResponse(nodeID types.NodeID, version tailcfg.CapabilityVersion, // to ensure the node sees changes to its own properties (e.g., hostname/DNS name changes) // without losing its view of peer status during rapid reconnection cycles if c.IsSelfUpdate(nodeID) { + responseType = "self" mapResp, err = mapper.selfMapResponse(nodeID, version) } else { + responseType = "change" mapResp, err = mapper.peerChangeResponse(nodeID, version, c.NodeID) } case change.NodeRemove: + responseType = "remove" mapResp, err = mapper.peerRemovedResponse(nodeID, c.NodeID) case change.NodeKeyExpiry: // If the node is the one whose key is expiring, we send a "full" self update // as nodes will ignore patch updates about themselves (?). if c.IsSelfUpdate(nodeID) { + responseType = "self" mapResp, err = mapper.selfMapResponse(nodeID, version) // mapResp, err = mapper.fullMapResponse(nodeID, version) } else { + responseType = "patch" mapResp, err = mapper.peerChangedPatchResponse(nodeID, []*tailcfg.PeerChange{ { NodeID: c.NodeID.NodeID(), @@ -155,6 +181,7 @@ func generateMapResponse(nodeID types.NodeID, version tailcfg.CapabilityVersion, default: // The following will always hit this: // change.Full, change.Policy + responseType = "full" mapResp, err = mapper.fullMapResponse(nodeID, version) }