1
0
mirror of https://github.com/juanfont/headscale.git synced 2025-09-16 17:50:44 +02:00

state: ensure netinfo is preserved and not removed

the client will send a lot of fields as `nil` if they have
not changed. NetInfo, which is inside Hostinfo, is one of those
fields and we often would override the whole hostinfo meaning that
we would remove netinfo if it hadnt changed.

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2025-09-08 11:17:27 +02:00 committed by Kristoffer Dalby
parent 233dffc186
commit 476f30ab20
4 changed files with 362 additions and 45 deletions

View File

@ -0,0 +1,50 @@
// Package state provides pure functions for processing MapRequest data.
// These functions are extracted from UpdateNodeFromMapRequest to improve
// testability and maintainability.
package state
import (
"github.com/juanfont/headscale/hscontrol/types"
"github.com/rs/zerolog/log"
"tailscale.com/tailcfg"
)
// NetInfoFromMapRequest determines the correct NetInfo to use.
// Returns the NetInfo that should be used for this request.
func NetInfoFromMapRequest(
nodeID types.NodeID,
currentHostinfo *tailcfg.Hostinfo,
reqHostinfo *tailcfg.Hostinfo,
) *tailcfg.NetInfo {
// If request has NetInfo, use it
if reqHostinfo != nil && reqHostinfo.NetInfo != nil {
return reqHostinfo.NetInfo
}
// Otherwise, use current NetInfo if available
if currentHostinfo != nil && currentHostinfo.NetInfo != nil {
log.Debug().
Caller().
Uint64("node.id", nodeID.Uint64()).
Int("preferredDERP", currentHostinfo.NetInfo.PreferredDERP).
Msg("using NetInfo from previous Hostinfo in MapRequest")
return currentHostinfo.NetInfo
}
// No NetInfo available anywhere - log for debugging
var hostname string
if reqHostinfo != nil {
hostname = reqHostinfo.Hostname
} else if currentHostinfo != nil {
hostname = currentHostinfo.Hostname
}
log.Debug().
Caller().
Uint64("node.id", nodeID.Uint64()).
Str("node.hostname", hostname).
Msg("node sent update but has no NetInfo in request or database")
return nil
}

View File

@ -0,0 +1,134 @@
package state
import (
"net/netip"
"testing"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
)
func TestNetInfoFromMapRequest(t *testing.T) {
nodeID := types.NodeID(1)
tests := []struct {
name string
currentHostinfo *tailcfg.Hostinfo
reqHostinfo *tailcfg.Hostinfo
expectNetInfo *tailcfg.NetInfo
}{
{
name: "no current NetInfo - return nil",
currentHostinfo: nil,
reqHostinfo: &tailcfg.Hostinfo{
Hostname: "test-node",
},
expectNetInfo: nil,
},
{
name: "current has NetInfo, request has NetInfo - use request",
currentHostinfo: &tailcfg.Hostinfo{
NetInfo: &tailcfg.NetInfo{PreferredDERP: 1},
},
reqHostinfo: &tailcfg.Hostinfo{
Hostname: "test-node",
NetInfo: &tailcfg.NetInfo{PreferredDERP: 2},
},
expectNetInfo: &tailcfg.NetInfo{PreferredDERP: 2},
},
{
name: "current has NetInfo, request has no NetInfo - use current",
currentHostinfo: &tailcfg.Hostinfo{
NetInfo: &tailcfg.NetInfo{PreferredDERP: 3},
},
reqHostinfo: &tailcfg.Hostinfo{
Hostname: "test-node",
},
expectNetInfo: &tailcfg.NetInfo{PreferredDERP: 3},
},
{
name: "current has NetInfo, no request Hostinfo - use current",
currentHostinfo: &tailcfg.Hostinfo{
NetInfo: &tailcfg.NetInfo{PreferredDERP: 4},
},
reqHostinfo: nil,
expectNetInfo: &tailcfg.NetInfo{PreferredDERP: 4},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := NetInfoFromMapRequest(nodeID, tt.currentHostinfo, tt.reqHostinfo)
if tt.expectNetInfo == nil {
assert.Nil(t, result, "expected nil NetInfo")
} else {
require.NotNil(t, result, "expected non-nil NetInfo")
assert.Equal(t, tt.expectNetInfo.PreferredDERP, result.PreferredDERP, "DERP mismatch")
}
})
}
}
func TestNetInfoPreservationInRegistrationFlow(t *testing.T) {
nodeID := types.NodeID(1)
// This test reproduces the bug in registration flows where NetInfo was lost
// because we used the wrong hostinfo reference when calling NetInfoFromMapRequest
t.Run("registration_flow_bug_reproduction", func(t *testing.T) {
// Simulate existing node with NetInfo (before re-registration)
existingNodeHostinfo := &tailcfg.Hostinfo{
Hostname: "test-node",
NetInfo: &tailcfg.NetInfo{PreferredDERP: 5},
}
// Simulate new registration request (no NetInfo)
newRegistrationHostinfo := &tailcfg.Hostinfo{
Hostname: "test-node",
OS: "linux",
// NetInfo is nil - this is what comes from the registration request
}
// Simulate what was happening in the bug: we passed the "current node being modified"
// hostinfo (which has no NetInfo) instead of the existing node's hostinfo
nodeBeingModifiedHostinfo := &tailcfg.Hostinfo{
Hostname: "test-node",
// NetInfo is nil because this node is being modified/reset
}
// BUG: Using the node being modified (no NetInfo) instead of existing node (has NetInfo)
buggyResult := NetInfoFromMapRequest(nodeID, nodeBeingModifiedHostinfo, newRegistrationHostinfo)
assert.Nil(t, buggyResult, "Bug: Should return nil when using wrong hostinfo reference")
// CORRECT: Using the existing node's hostinfo (has NetInfo)
correctResult := NetInfoFromMapRequest(nodeID, existingNodeHostinfo, newRegistrationHostinfo)
assert.NotNil(t, correctResult, "Fix: Should preserve NetInfo when using correct hostinfo reference")
assert.Equal(t, 5, correctResult.PreferredDERP, "Should preserve the DERP region from existing node")
})
}
// Simple helper function for tests
func createTestNodeSimple(id types.NodeID) *types.Node {
user := types.User{
Name: "test-user",
}
machineKey := key.NewMachine()
nodeKey := key.NewNode()
node := &types.Node{
ID: id,
Hostname: "test-node",
UserID: uint(id),
User: user,
MachineKey: machineKey.Public(),
NodeKey: nodeKey.Public(),
IPv4: &netip.Addr{},
IPv6: &netip.Addr{},
}
return node
}

View File

@ -852,10 +852,25 @@ func (s *State) BackfillNodeIPs() ([]string, error) {
} }
for _, node := range nodes { for _, node := range nodes {
// Preserve online status when refreshing from database // Preserve online status and NetInfo when refreshing from database
existingNode, exists := s.nodeStore.GetNode(node.ID) existingNode, exists := s.nodeStore.GetNode(node.ID)
if exists && existingNode.Valid() { if exists && existingNode.Valid() {
node.IsOnline = ptr.To(existingNode.IsOnline().Get()) node.IsOnline = ptr.To(existingNode.IsOnline().Get())
// TODO(kradalby): We should ensure we use the same hostinfo and node merge semantics
// when a node re-registers as we do when it sends a map request (UpdateNodeFromMapRequest).
// Preserve NetInfo from existing node to prevent loss during backfill
netInfo := NetInfoFromMapRequest(node.ID, existingNode.AsStruct().Hostinfo, node.Hostinfo)
if netInfo != nil {
if node.Hostinfo != nil {
hostinfoCopy := *node.Hostinfo
hostinfoCopy.NetInfo = netInfo
node.Hostinfo = &hostinfoCopy
} else {
node.Hostinfo = &tailcfg.Hostinfo{NetInfo: netInfo}
}
}
} }
// TODO(kradalby): This should just update the IP addresses, nothing else in the node store. // TODO(kradalby): This should just update the IP addresses, nothing else in the node store.
// We should avoid PutNode here. // We should avoid PutNode here.
@ -1166,7 +1181,24 @@ func (s *State) HandleNodeFromAuthPath(
node.NodeKey = nodeToRegister.NodeKey node.NodeKey = nodeToRegister.NodeKey
node.DiscoKey = nodeToRegister.DiscoKey node.DiscoKey = nodeToRegister.DiscoKey
node.Hostname = nodeToRegister.Hostname node.Hostname = nodeToRegister.Hostname
node.Hostinfo = nodeToRegister.Hostinfo
// TODO(kradalby): We should ensure we use the same hostinfo and node merge semantics
// when a node re-registers as we do when it sends a map request (UpdateNodeFromMapRequest).
// Preserve NetInfo from existing node when re-registering
netInfo := NetInfoFromMapRequest(existingMachineNode.ID, existingMachineNode.Hostinfo, nodeToRegister.Hostinfo)
if netInfo != nil {
if nodeToRegister.Hostinfo != nil {
hostinfoCopy := *nodeToRegister.Hostinfo
hostinfoCopy.NetInfo = netInfo
node.Hostinfo = &hostinfoCopy
} else {
node.Hostinfo = &tailcfg.Hostinfo{NetInfo: netInfo}
}
} else {
node.Hostinfo = nodeToRegister.Hostinfo
}
node.Endpoints = nodeToRegister.Endpoints node.Endpoints = nodeToRegister.Endpoints
node.RegisterMethod = nodeToRegister.RegisterMethod node.RegisterMethod = nodeToRegister.RegisterMethod
if expiry != nil { if expiry != nil {
@ -1333,7 +1365,24 @@ func (s *State) HandleNodeFromPreAuthKey(
s.nodeStore.UpdateNode(existingNode.ID, func(node *types.Node) { s.nodeStore.UpdateNode(existingNode.ID, func(node *types.Node) {
node.NodeKey = nodeToRegister.NodeKey node.NodeKey = nodeToRegister.NodeKey
node.Hostname = nodeToRegister.Hostname node.Hostname = nodeToRegister.Hostname
node.Hostinfo = nodeToRegister.Hostinfo
// TODO(kradalby): We should ensure we use the same hostinfo and node merge semantics
// when a node re-registers as we do when it sends a map request (UpdateNodeFromMapRequest).
// Preserve NetInfo from existing node when re-registering
netInfo := NetInfoFromMapRequest(existingNode.ID, existingNode.Hostinfo, nodeToRegister.Hostinfo)
if netInfo != nil {
if nodeToRegister.Hostinfo != nil {
hostinfoCopy := *nodeToRegister.Hostinfo
hostinfoCopy.NetInfo = netInfo
node.Hostinfo = &hostinfoCopy
} else {
node.Hostinfo = &tailcfg.Hostinfo{NetInfo: netInfo}
}
} else {
node.Hostinfo = nodeToRegister.Hostinfo
}
node.Endpoints = nodeToRegister.Endpoints node.Endpoints = nodeToRegister.Endpoints
node.RegisterMethod = nodeToRegister.RegisterMethod node.RegisterMethod = nodeToRegister.RegisterMethod
node.ForcedTags = nodeToRegister.ForcedTags node.ForcedTags = nodeToRegister.ForcedTags
@ -1527,6 +1576,12 @@ func (s *State) autoApproveNodes() ([]change.ChangeSet, error) {
// - node.ApplyPeerChange // - node.ApplyPeerChange
// - logTracePeerChange in poll.go. // - logTracePeerChange in poll.go.
func (s *State) UpdateNodeFromMapRequest(id types.NodeID, req tailcfg.MapRequest) (change.ChangeSet, error) { func (s *State) UpdateNodeFromMapRequest(id types.NodeID, req tailcfg.MapRequest) (change.ChangeSet, error) {
log.Trace().
Caller().
Uint64("node.id", id.Uint64()).
Interface("request", req).
Msg("Processing MapRequest for node")
var routeChange bool var routeChange bool
var hostinfoChanged bool var hostinfoChanged bool
var needsRouteApproval bool var needsRouteApproval bool
@ -1536,6 +1591,27 @@ func (s *State) UpdateNodeFromMapRequest(id types.NodeID, req tailcfg.MapRequest
peerChange := currentNode.PeerChangeFromMapRequest(req) peerChange := currentNode.PeerChangeFromMapRequest(req)
hostinfoChanged = !hostinfoEqual(currentNode.View(), req.Hostinfo) hostinfoChanged = !hostinfoEqual(currentNode.View(), req.Hostinfo)
// Get the correct NetInfo to use
netInfo := NetInfoFromMapRequest(id, currentNode.Hostinfo, req.Hostinfo)
// Apply NetInfo to request Hostinfo
if req.Hostinfo != nil {
if netInfo != nil {
// Create a copy to avoid modifying the original
hostinfoCopy := *req.Hostinfo
hostinfoCopy.NetInfo = netInfo
req.Hostinfo = &hostinfoCopy
}
} else if netInfo != nil {
// Create minimal Hostinfo with NetInfo
req.Hostinfo = &tailcfg.Hostinfo{
NetInfo: netInfo,
}
}
// Re-check hostinfoChanged after potential NetInfo preservation
hostinfoChanged = !hostinfoEqual(currentNode.View(), req.Hostinfo)
// If there is no changes and nothing to save, // If there is no changes and nothing to save,
// return early. // return early.
if peerChangeEmpty(peerChange) && !hostinfoChanged { if peerChangeEmpty(peerChange) && !hostinfoChanged {
@ -1544,31 +1620,43 @@ func (s *State) UpdateNodeFromMapRequest(id types.NodeID, req tailcfg.MapRequest
// Calculate route approval before NodeStore update to avoid calling View() inside callback // Calculate route approval before NodeStore update to avoid calling View() inside callback
var autoApprovedRoutes []netip.Prefix var autoApprovedRoutes []netip.Prefix
hasNewRoutes := req.Hostinfo != nil && len(req.Hostinfo.RoutableIPs) > 0 var hasNewRoutes bool
if hi := req.Hostinfo; hi != nil {
hasNewRoutes = len(hi.RoutableIPs) > 0
}
needsRouteApproval = hostinfoChanged && (routesChanged(currentNode.View(), req.Hostinfo) || (hasNewRoutes && len(currentNode.ApprovedRoutes) == 0)) needsRouteApproval = hostinfoChanged && (routesChanged(currentNode.View(), req.Hostinfo) || (hasNewRoutes && len(currentNode.ApprovedRoutes) == 0))
if needsRouteApproval { if needsRouteApproval {
autoApprovedRoutes, routeChange = policy.ApproveRoutesWithPolicy( // Extract announced routes from request
s.polMan, var announcedRoutes []netip.Prefix
currentNode.View(), if req.Hostinfo != nil {
// We need to preserve currently approved routes to ensure announcedRoutes = req.Hostinfo.RoutableIPs
// routes outside of the policy approver is persisted. }
currentNode.ApprovedRoutes,
// However, the node has updated its routable IPs, so we // Apply policy-based auto-approval if routes are announced
// need to approve them using that as a context. if len(announcedRoutes) > 0 {
req.Hostinfo.RoutableIPs, autoApprovedRoutes, routeChange = policy.ApproveRoutesWithPolicy(
) s.polMan,
currentNode.View(),
currentNode.ApprovedRoutes,
announcedRoutes,
)
}
} }
// Log when routes change but approval doesn't // Log when routes change but approval doesn't
if hostinfoChanged && req.Hostinfo != nil && routesChanged(currentNode.View(), req.Hostinfo) && !routeChange { if hostinfoChanged && !routeChange {
log.Debug(). if hi := req.Hostinfo; hi != nil {
Caller(). if routesChanged(currentNode.View(), hi) {
Uint64("node.id", id.Uint64()). log.Debug().
Strs("oldAnnouncedRoutes", util.PrefixesToString(currentNode.AnnouncedRoutes())). Caller().
Strs("newAnnouncedRoutes", util.PrefixesToString(req.Hostinfo.RoutableIPs)). Uint64("node.id", id.Uint64()).
Strs("approvedRoutes", util.PrefixesToString(currentNode.ApprovedRoutes)). Strs("oldAnnouncedRoutes", util.PrefixesToString(currentNode.AnnouncedRoutes())).
Bool("routeChange", routeChange). Strs("newAnnouncedRoutes", util.PrefixesToString(hi.RoutableIPs)).
Msg("announced routes changed but approved routes did not") Strs("approvedRoutes", util.PrefixesToString(currentNode.ApprovedRoutes)).
Bool("routeChange", routeChange).
Msg("announced routes changed but approved routes did not")
}
}
} }
currentNode.ApplyPeerChange(&peerChange) currentNode.ApplyPeerChange(&peerChange)
@ -1581,27 +1669,7 @@ func (s *State) UpdateNodeFromMapRequest(id types.NodeID, req tailcfg.MapRequest
// https://github.com/tailscale/tailscale/commit/e1011f138737286ecf5123ff887a7a5800d129a2 // https://github.com/tailscale/tailscale/commit/e1011f138737286ecf5123ff887a7a5800d129a2
// TODO(kradalby): evaluate if we need better comparing of hostinfo // TODO(kradalby): evaluate if we need better comparing of hostinfo
// before we take the changes. // before we take the changes.
// Preserve NetInfo only if the existing node actually has valid NetInfo // NetInfo preservation has already been handled above before early return check
// This prevents copying nil NetInfo which would lose DERP relay assignments
if req.Hostinfo != nil && req.Hostinfo.NetInfo == nil && currentNode.Hostinfo != nil && currentNode.Hostinfo.NetInfo != nil {
log.Debug().
Caller().
Uint64("node.id", id.Uint64()).
Int("preferredDERP", currentNode.Hostinfo.NetInfo.PreferredDERP).
Msg("preserving NetInfo from previous Hostinfo in MapRequest")
req.Hostinfo.NetInfo = currentNode.Hostinfo.NetInfo
} else if req.Hostinfo == nil && currentNode.Hostinfo != nil && currentNode.Hostinfo.NetInfo != nil {
// When MapRequest has no Hostinfo but we have existing NetInfo, create a minimal
// Hostinfo to preserve the NetInfo to maintain DERP connectivity
log.Debug().
Caller().
Uint64("node.id", id.Uint64()).
Int("preferredDERP", currentNode.Hostinfo.NetInfo.PreferredDERP).
Msg("creating minimal Hostinfo to preserve NetInfo in MapRequest")
req.Hostinfo = &tailcfg.Hostinfo{
NetInfo: currentNode.Hostinfo.NetInfo,
}
}
currentNode.Hostinfo = req.Hostinfo currentNode.Hostinfo = req.Hostinfo
currentNode.ApplyHostnameFromHostInfo(req.Hostinfo) currentNode.ApplyHostnameFromHostInfo(req.Hostinfo)
@ -1630,7 +1698,12 @@ func (s *State) UpdateNodeFromMapRequest(id types.NodeID, req tailcfg.MapRequest
// 2. The announced routes changed (even if approved routes stayed the same) // 2. The announced routes changed (even if approved routes stayed the same)
// This is because SubnetRoutes is the intersection of announced AND approved routes. // This is because SubnetRoutes is the intersection of announced AND approved routes.
needsRouteUpdate := false needsRouteUpdate := false
routesChangedButNotApproved := hostinfoChanged && req.Hostinfo != nil && needsRouteApproval && !routeChange var routesChangedButNotApproved bool
if hostinfoChanged && needsRouteApproval && !routeChange {
if hi := req.Hostinfo; hi != nil {
routesChangedButNotApproved = true
}
}
if routeChange { if routeChange {
needsRouteUpdate = true needsRouteUpdate = true
log.Debug(). log.Debug().

View File

@ -66,6 +66,9 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
} }
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected", 30*time.Second) requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected", 30*time.Second)
// Validate that all nodes have NetInfo and DERP servers before logout
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP before logout", 1*time.Minute)
// assertClientsState(t, allClients) // assertClientsState(t, allClients)
clientIPs := make(map[TailscaleClient][]netip.Addr) clientIPs := make(map[TailscaleClient][]netip.Addr)
@ -149,6 +152,9 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected to batcher", 120*time.Second) requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected to batcher", 120*time.Second)
// Validate that all nodes have NetInfo and DERP servers after reconnection
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after reconnection", 1*time.Minute)
err = scenario.WaitForTailscaleSync() err = scenario.WaitForTailscaleSync()
assertNoErrSync(t, err) assertNoErrSync(t, err)
@ -191,6 +197,60 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
} }
} }
// requireAllClientsNetInfoAndDERP validates that all nodes have NetInfo in the database
// and a valid DERP server based on the NetInfo. This function follows the pattern of
// requireAllClientsOnline by using hsic.DebugNodeStore to get the database state.
func requireAllClientsNetInfoAndDERP(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, message string, timeout time.Duration) {
t.Helper()
startTime := time.Now()
t.Logf("requireAllClientsNetInfoAndDERP: Starting validation at %s - %s", startTime.Format(TimestampFormat), message)
require.EventuallyWithT(t, func(c *assert.CollectT) {
// Get nodestore state
nodeStore, err := headscale.DebugNodeStore()
assert.NoError(c, err, "Failed to get nodestore debug info")
if err != nil {
return
}
// Validate node counts first
expectedCount := len(expectedNodes)
assert.Equal(c, expectedCount, len(nodeStore), "NodeStore total nodes mismatch")
// Check each expected node
for _, nodeID := range expectedNodes {
node, exists := nodeStore[nodeID]
assert.True(c, exists, "Node %d not found in nodestore", nodeID)
if !exists {
continue
}
// Validate that the node has Hostinfo
assert.NotNil(c, node.Hostinfo, "Node %d (%s) should have Hostinfo", nodeID, node.Hostname)
if node.Hostinfo == nil {
continue
}
// Validate that the node has NetInfo
assert.NotNil(c, node.Hostinfo.NetInfo, "Node %d (%s) should have NetInfo in Hostinfo", nodeID, node.Hostname)
if node.Hostinfo.NetInfo == nil {
continue
}
// Validate that the node has a valid DERP server (PreferredDERP should be > 0)
preferredDERP := node.Hostinfo.NetInfo.PreferredDERP
assert.Greater(c, preferredDERP, 0, "Node %d (%s) should have a valid DERP server (PreferredDERP > 0), got %d", nodeID, node.Hostname, preferredDERP)
t.Logf("Node %d (%s) has valid NetInfo with DERP server %d", nodeID, node.Hostname, preferredDERP)
}
}, timeout, 2*time.Second, message)
endTime := time.Now()
duration := endTime.Sub(startTime)
t.Logf("requireAllClientsNetInfoAndDERP: Completed validation at %s - Duration: %v - %s", endTime.Format(TimestampFormat), duration, message)
}
func assertLastSeenSet(t *testing.T, node *v1.Node) { func assertLastSeenSet(t *testing.T, node *v1.Node) {
assert.NotNil(t, node) assert.NotNil(t, node)
assert.NotNil(t, node.GetLastSeen()) assert.NotNil(t, node.GetLastSeen())