mirror of
https://github.com/juanfont/headscale.git
synced 2025-08-24 13:46:53 +02:00
fix(state): resolve NodeStore synchronization race conditions
Fixes CLI timeout issues in TestNodeMoveCommand by ensuring NodeStore remains the single source of truth for all node operations. - Fix updateNodeTx to only return errors, not stale database NodeViews - All callers now read updated NodeView from NodeStore after DB updates - Add nil check for IsOnline in node.Proto() to prevent panics - Add TODO comments to validate NodeStore read patterns This eliminates race conditions where database reads could return partially initialized nodes with nil IsOnline pointers, causing gRPC server panics and CLI command timeouts. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
3a92b14c1a
commit
9b9f481c9c
@ -340,11 +340,12 @@ func (s *State) ListAllUsers() ([]types.User, error) {
|
||||
// IMPORTANT: This function does NOT update the NodeStore. The caller MUST update the NodeStore
|
||||
// BEFORE calling this function with the EXACT same changes that the database update will make.
|
||||
// This ensures the NodeStore is the source of truth for the batcher and maintains consistency.
|
||||
func (s *State) updateNodeTx(nodeID types.NodeID, updateFn func(tx *gorm.DB) error) (types.NodeView, error) {
|
||||
// Returns error only; callers should get the updated NodeView from NodeStore to maintain consistency.
|
||||
func (s *State) updateNodeTx(nodeID types.NodeID, updateFn func(tx *gorm.DB) error) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
node, err := hsdb.Write(s.db.DB, func(tx *gorm.DB) (*types.Node, error) {
|
||||
_, err := hsdb.Write(s.db.DB, func(tx *gorm.DB) (*types.Node, error) {
|
||||
if err := updateFn(tx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -360,11 +361,7 @@ func (s *State) updateNodeTx(nodeID types.NodeID, updateFn func(tx *gorm.DB) err
|
||||
|
||||
return node, nil
|
||||
})
|
||||
if err != nil {
|
||||
return types.NodeView{}, err
|
||||
}
|
||||
|
||||
return node.View(), nil
|
||||
return err
|
||||
}
|
||||
|
||||
// persistNodeToDB saves the current state of a node from NodeStore to the database.
|
||||
@ -446,7 +443,7 @@ func (s *State) Connect(id types.NodeID) change.ChangeSet {
|
||||
|
||||
// Also persist the last seen time to the database
|
||||
// Note: IsOnline is managed only in NodeStore (marked with gorm:"-"), not persisted to database
|
||||
_, err := s.updateNodeTx(id, func(tx *gorm.DB) error {
|
||||
err := s.updateNodeTx(id, func(tx *gorm.DB) error {
|
||||
// Update last_seen in the database
|
||||
return hsdb.SetLastSeen(tx, id, now)
|
||||
})
|
||||
@ -486,7 +483,7 @@ func (s *State) Disconnect(id types.NodeID) (change.ChangeSet, error) {
|
||||
n.IsOnline = ptr.To(false)
|
||||
})
|
||||
|
||||
_, err := s.updateNodeTx(id, func(tx *gorm.DB) error {
|
||||
err := s.updateNodeTx(id, func(tx *gorm.DB) error {
|
||||
// Update last_seen in the database
|
||||
// Note: IsOnline is managed only in NodeStore (marked with gorm:"-"), not persisted to database
|
||||
return hsdb.SetLastSeen(tx, id, now)
|
||||
@ -623,13 +620,20 @@ func (s *State) SetNodeExpiry(nodeID types.NodeID, expiry time.Time) (types.Node
|
||||
node.Expiry = &expiryPtr
|
||||
})
|
||||
|
||||
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
return hsdb.NodeSetExpiry(tx, nodeID, expiry)
|
||||
})
|
||||
if err != nil {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("setting node expiry: %w", err)
|
||||
}
|
||||
|
||||
// Get the updated node from NodeStore to ensure consistency
|
||||
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
|
||||
n, found := s.GetNodeByID(nodeID)
|
||||
if !found {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
|
||||
}
|
||||
|
||||
// Check if policy manager needs updating
|
||||
c, err := s.updatePolicyManagerNodes()
|
||||
if err != nil {
|
||||
@ -652,13 +656,20 @@ func (s *State) SetNodeTags(nodeID types.NodeID, tags []string) (types.NodeView,
|
||||
node.ForcedTags = tags
|
||||
})
|
||||
|
||||
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
return hsdb.SetTags(tx, nodeID, tags)
|
||||
})
|
||||
if err != nil {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("setting node tags: %w", err)
|
||||
}
|
||||
|
||||
// Get the updated node from NodeStore to ensure consistency
|
||||
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
|
||||
n, found := s.GetNodeByID(nodeID)
|
||||
if !found {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
|
||||
}
|
||||
|
||||
// Check if policy manager needs updating
|
||||
c, err := s.updatePolicyManagerNodes()
|
||||
if err != nil {
|
||||
@ -681,13 +692,20 @@ func (s *State) SetApprovedRoutes(nodeID types.NodeID, routes []netip.Prefix) (t
|
||||
node.ApprovedRoutes = routes
|
||||
})
|
||||
|
||||
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
return hsdb.SetApprovedRoutes(tx, nodeID, routes)
|
||||
})
|
||||
if err != nil {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("setting approved routes: %w", err)
|
||||
}
|
||||
|
||||
// Get the updated node from NodeStore to ensure consistency
|
||||
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
|
||||
n, found := s.GetNodeByID(nodeID)
|
||||
if !found {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
|
||||
}
|
||||
|
||||
// Check if policy manager needs updating
|
||||
c, err := s.updatePolicyManagerNodes()
|
||||
if err != nil {
|
||||
@ -735,13 +753,20 @@ func (s *State) RenameNode(nodeID types.NodeID, newName string) (types.NodeView,
|
||||
node.GivenName = newName
|
||||
})
|
||||
|
||||
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
err = s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
return hsdb.RenameNode(tx, nodeID, newName)
|
||||
})
|
||||
if err != nil {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("renaming node: %w", err)
|
||||
}
|
||||
|
||||
// Get the updated node from NodeStore to ensure consistency
|
||||
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
|
||||
n, found := s.GetNodeByID(nodeID)
|
||||
if !found {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
|
||||
}
|
||||
|
||||
// Check if policy manager needs updating
|
||||
c, err := s.updatePolicyManagerNodes()
|
||||
if err != nil {
|
||||
@ -776,13 +801,20 @@ func (s *State) AssignNodeToUser(nodeID types.NodeID, userID types.UserID) (type
|
||||
n.UserID = uint(userID)
|
||||
})
|
||||
|
||||
n, err := s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
err = s.updateNodeTx(nodeID, func(tx *gorm.DB) error {
|
||||
return hsdb.AssignNodeToUser(tx, nodeID, userID)
|
||||
})
|
||||
if err != nil {
|
||||
return types.NodeView{}, change.EmptySet, err
|
||||
}
|
||||
|
||||
// Get the updated node from NodeStore to ensure consistency
|
||||
// TODO(kradalby): Validate if this NodeStore read makes sense after database update
|
||||
n, found := s.GetNodeByID(nodeID)
|
||||
if !found {
|
||||
return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID)
|
||||
}
|
||||
|
||||
// Check if policy manager needs updating
|
||||
c, err := s.updatePolicyManagerNodes()
|
||||
if err != nil {
|
||||
|
@ -356,7 +356,7 @@ func (node *Node) Proto() *v1.Node {
|
||||
GivenName: node.GivenName,
|
||||
User: node.User.Proto(),
|
||||
ForcedTags: node.ForcedTags,
|
||||
Online: *node.IsOnline,
|
||||
Online: node.IsOnline != nil && *node.IsOnline,
|
||||
|
||||
// Only ApprovedRoutes and AvailableRoutes is set here. SubnetRoutes has
|
||||
// to be populated manually with PrimaryRoute, to ensure it includes the
|
||||
|
Loading…
Reference in New Issue
Block a user