1
0
mirror of https://github.com/juanfont/headscale.git synced 2025-09-02 13:47:00 +02:00

fix(state): resolve NodeStore synchronization race conditions

Fixes CLI timeout issues in TestNodeMoveCommand by ensuring NodeStore
remains the single source of truth for all node operations.

- Fix updateNodeTx to only return errors, not stale database NodeViews
- All callers now read updated NodeView from NodeStore after DB updates
- Add nil check for IsOnline in node.Proto() to prevent panics
- Add TODO comments to validate NodeStore read patterns

This eliminates race conditions where database reads could return
partially initialized nodes with nil IsOnline pointers, causing
gRPC server panics and CLI command timeouts.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kristoffer Dalby 2025-08-19 10:14:43 +00:00
parent 3a92b14c1a
commit 9b9f481c9c
2 changed files with 47 additions and 15 deletions

View File

@ -340,11 +340,12 @@ func (s *State) ListAllUsers() ([]types.User, error) {
// IMPORTANT: This function does NOT update the NodeStore. The caller MUST update the NodeStore // IMPORTANT: This function does NOT update the NodeStore. The caller MUST update the NodeStore
// BEFORE calling this function with the EXACT same changes that the database update will make. // BEFORE calling this function with the EXACT same changes that the database update will make.
// This ensures the NodeStore is the source of truth for the batcher and maintains consistency. // This ensures the NodeStore is the source of truth for the batcher and maintains consistency.
func (s *State) updateNodeTx(nodeID types.NodeID, updateFn func(tx *gorm.DB) error) (types.NodeView, error) { // Returns error only; callers should get the updated NodeView from NodeStore to maintain consistency.
func (s *State) updateNodeTx(nodeID types.NodeID, updateFn func(tx *gorm.DB) error) error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
node, err := hsdb.Write(s.db.DB, func(tx *gorm.DB) (*types.Node, error) { _, err := hsdb.Write(s.db.DB, func(tx *gorm.DB) (*types.Node, error) {
if err := updateFn(tx); err != nil { if err := updateFn(tx); err != nil {
return nil, err return nil, err
} }
@ -360,11 +361,7 @@ func (s *State) updateNodeTx(nodeID types.NodeID, updateFn func(tx *gorm.DB) err
return node, nil return node, nil
}) })
if err != nil { return err
return types.NodeView{}, err
}
return node.View(), nil
} }
// persistNodeToDB saves the current state of a node from NodeStore to the database. // persistNodeToDB saves the current state of a node from NodeStore to the database.
@ -446,7 +443,7 @@ func (s *State) Connect(id types.NodeID) change.ChangeSet {
// Also persist the last seen time to the database // Also persist the last seen time to the database
// Note: IsOnline is managed only in NodeStore (marked with gorm:"-"), not persisted to database // Note: IsOnline is managed only in NodeStore (marked with gorm:"-"), not persisted to database
_, err := s.updateNodeTx(id, func(tx *gorm.DB) error { err := s.updateNodeTx(id, func(tx *gorm.DB) error {
// Update last_seen in the database // Update last_seen in the database
return hsdb.SetLastSeen(tx, id, now) return hsdb.SetLastSeen(tx, id, now)
}) })
@ -486,7 +483,7 @@ func (s *State) Disconnect(id types.NodeID) (change.ChangeSet, error) {
n.IsOnline = ptr.To(false) n.IsOnline = ptr.To(false)
}) })
_, err := s.updateNodeTx(id, func(tx *gorm.DB) error { err := s.updateNodeTx(id, func(tx *gorm.DB) error {
// Update last_seen in the database // Update last_seen in the database
// Note: IsOnline is managed only in NodeStore (marked with gorm:"-"), not persisted to database // Note: IsOnline is managed only in NodeStore (marked with gorm:"-"), not persisted to database
return hsdb.SetLastSeen(tx, id, now) return hsdb.SetLastSeen(tx, id, now)
@ -623,13 +620,20 @@ func (s *State) SetNodeExpiry(nodeID types.NodeID, expiry time.Time) (types.Node
node.Expiry = &expiryPtr node.Expiry = &expiryPtr
}) })
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error { err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
return hsdb.NodeSetExpiry(tx, nodeID, expiry) return hsdb.NodeSetExpiry(tx, nodeID, expiry)
}) })
if err != nil { if err != nil {
return types.NodeView{}, change.EmptySet, fmt.Errorf("setting node expiry: %w", err) return types.NodeView{}, change.EmptySet, fmt.Errorf("setting node expiry: %w", err)
} }
// Get the updated node from NodeStore to ensure consistency
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
n, found := s.GetNodeByID(nodeID)
if !found {
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
}
// Check if policy manager needs updating // Check if policy manager needs updating
c, err := s.updatePolicyManagerNodes() c, err := s.updatePolicyManagerNodes()
if err != nil { if err != nil {
@ -652,13 +656,20 @@ func (s *State) SetNodeTags(nodeID types.NodeID, tags []string) (types.NodeView,
node.ForcedTags = tags node.ForcedTags = tags
}) })
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error { err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
return hsdb.SetTags(tx, nodeID, tags) return hsdb.SetTags(tx, nodeID, tags)
}) })
if err != nil { if err != nil {
return types.NodeView{}, change.EmptySet, fmt.Errorf("setting node tags: %w", err) return types.NodeView{}, change.EmptySet, fmt.Errorf("setting node tags: %w", err)
} }
// Get the updated node from NodeStore to ensure consistency
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
n, found := s.GetNodeByID(nodeID)
if !found {
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
}
// Check if policy manager needs updating // Check if policy manager needs updating
c, err := s.updatePolicyManagerNodes() c, err := s.updatePolicyManagerNodes()
if err != nil { if err != nil {
@ -681,13 +692,20 @@ func (s *State) SetApprovedRoutes(nodeID types.NodeID, routes []netip.Prefix) (t
node.ApprovedRoutes = routes node.ApprovedRoutes = routes
}) })
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error { err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
return hsdb.SetApprovedRoutes(tx, nodeID, routes) return hsdb.SetApprovedRoutes(tx, nodeID, routes)
}) })
if err != nil { if err != nil {
return types.NodeView{}, change.EmptySet, fmt.Errorf("setting approved routes: %w", err) return types.NodeView{}, change.EmptySet, fmt.Errorf("setting approved routes: %w", err)
} }
// Get the updated node from NodeStore to ensure consistency
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
n, found := s.GetNodeByID(nodeID)
if !found {
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
}
// Check if policy manager needs updating // Check if policy manager needs updating
c, err := s.updatePolicyManagerNodes() c, err := s.updatePolicyManagerNodes()
if err != nil { if err != nil {
@ -735,13 +753,20 @@ func (s *State) RenameNode(nodeID types.NodeID, newName string) (types.NodeView,
node.GivenName = newName node.GivenName = newName
}) })
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error { err = s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
return hsdb.RenameNode(tx, nodeID, newName) return hsdb.RenameNode(tx, nodeID, newName)
}) })
if err != nil { if err != nil {
return types.NodeView{}, change.EmptySet, fmt.Errorf("renaming node: %w", err) return types.NodeView{}, change.EmptySet, fmt.Errorf("renaming node: %w", err)
} }
// Get the updated node from NodeStore to ensure consistency
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
n, found := s.GetNodeByID(nodeID)
if !found {
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
}
// Check if policy manager needs updating // Check if policy manager needs updating
c, err := s.updatePolicyManagerNodes() c, err := s.updatePolicyManagerNodes()
if err != nil { if err != nil {
@ -776,13 +801,20 @@ func (s *State) AssignNodeToUser(nodeID types.NodeID, userID types.UserID) (type
n.UserID = uint(userID) n.UserID = uint(userID)
}) })
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error { err = s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
return hsdb.AssignNodeToUser(tx, nodeID, userID) return hsdb.AssignNodeToUser(tx, nodeID, userID)
}) })
if err != nil { if err != nil {
return types.NodeView{}, change.EmptySet, err return types.NodeView{}, change.EmptySet, err
} }
// Get the updated node from NodeStore to ensure consistency
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
n, found := s.GetNodeByID(nodeID)
if !found {
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
}
// Check if policy manager needs updating // Check if policy manager needs updating
c, err := s.updatePolicyManagerNodes() c, err := s.updatePolicyManagerNodes()
if err != nil { if err != nil {

View File

@ -356,7 +356,7 @@ func (node *Node) Proto() *v1.Node {
GivenName: node.GivenName, GivenName: node.GivenName,
User: node.User.Proto(), User: node.User.Proto(),
ForcedTags: node.ForcedTags, ForcedTags: node.ForcedTags,
Online: *node.IsOnline, Online: node.IsOnline != nil && *node.IsOnline,
// Only ApprovedRoutes and AvailableRoutes is set here. SubnetRoutes has // Only ApprovedRoutes and AvailableRoutes is set here. SubnetRoutes has
// to be populated manually with PrimaryRoute, to ensure it includes the // to be populated manually with PrimaryRoute, to ensure it includes the