nodestore: tests

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
2025-09-25 17:51:11 +02:00 · 2025-09-22 13:55:52 +02:00 · 2025-09-22 13:55:52 +02:00 · 61c4a20f78
commit 61c4a20f78
parent 8442220b80
1 changed files with 303 additions and 0 deletions
--- a/hscontrol/state/node_store_test.go
+++ b/hscontrol/state/node_store_test.go
@ -1,7 +1,11 @@
 package state

 import (
+	"context"
+	"fmt"
 	"net/netip"
+	"runtime"
+	"sync"
 	"testing"
 	"time"

@ -835,3 +839,302 @@ type testStep struct {
 	name   string
 	action func(store *NodeStore)
 }
+
+// --- Additional NodeStore concurrency, batching, race, resource, timeout, and allocation tests ---
+
+// Helper for concurrent test nodes
+func createConcurrentTestNode(id types.NodeID, hostname string) types.Node {
+	machineKey := key.NewMachine()
+	nodeKey := key.NewNode()
+	return types.Node{
+		ID:         id,
+		Hostname:   hostname,
+		MachineKey: machineKey.Public(),
+		NodeKey:    nodeKey.Public(),
+		UserID:     1,
+		User: types.User{
+			Name: "concurrent-test-user",
+		},
+	}
+}
+
+// --- Concurrency: concurrent PutNode operations ---
+func TestNodeStoreConcurrentPutNode(t *testing.T) {
+	const concurrentOps = 20
+	store := NewNodeStore(nil, allowAllPeersFunc)
+	store.Start()
+	defer store.Stop()
+
+	var wg sync.WaitGroup
+	results := make(chan bool, concurrentOps)
+	for i := 0; i < concurrentOps; i++ {
+		wg.Add(1)
+		go func(nodeID int) {
+			defer wg.Done()
+			node := createConcurrentTestNode(types.NodeID(nodeID), "concurrent-node")
+			resultNode := store.PutNode(node)
+			results <- resultNode.Valid()
+		}(i + 1)
+	}
+	wg.Wait()
+	close(results)
+
+	successCount := 0
+	for success := range results {
+		if success {
+			successCount++
+		}
+	}
+	require.Equal(t, concurrentOps, successCount, "All concurrent PutNode operations should succeed")
+}
+
+// --- Batching: concurrent ops fit in one batch ---
+func TestNodeStoreBatchingEfficiency(t *testing.T) {
+	const batchSize = 10
+	const ops = 15 // more than batchSize
+	store := NewNodeStore(nil, allowAllPeersFunc)
+	store.Start()
+	defer store.Stop()
+
+	var wg sync.WaitGroup
+	results := make(chan bool, ops)
+	for i := 0; i < ops; i++ {
+		wg.Add(1)
+		go func(nodeID int) {
+			defer wg.Done()
+			node := createConcurrentTestNode(types.NodeID(nodeID), "batch-node")
+			resultNode := store.PutNode(node)
+			results <- resultNode.Valid()
+		}(i + 1)
+	}
+	wg.Wait()
+	close(results)
+
+	successCount := 0
+	for success := range results {
+		if success {
+			successCount++
+		}
+	}
+	require.Equal(t, ops, successCount, "All batch PutNode operations should succeed")
+}
+
+// --- Race conditions: many goroutines on same node ---
+func TestNodeStoreRaceConditions(t *testing.T) {
+	store := NewNodeStore(nil, allowAllPeersFunc)
+	store.Start()
+	defer store.Stop()
+
+	nodeID := types.NodeID(1)
+	node := createConcurrentTestNode(nodeID, "race-node")
+	resultNode := store.PutNode(node)
+	require.True(t, resultNode.Valid())
+
+	const numGoroutines = 30
+	const opsPerGoroutine = 10
+	var wg sync.WaitGroup
+	errors := make(chan error, numGoroutines*opsPerGoroutine)
+
+	for i := 0; i < numGoroutines; i++ {
+		wg.Add(1)
+		go func(gid int) {
+			defer wg.Done()
+			for j := 0; j < opsPerGoroutine; j++ {
+				switch j % 3 {
+				case 0:
+					resultNode, _ := store.UpdateNode(nodeID, func(n *types.Node) {
+						n.Hostname = "race-updated"
+					})
+					if !resultNode.Valid() {
+						errors <- fmt.Errorf("UpdateNode failed in goroutine %d, op %d", gid, j)
+					}
+				case 1:
+					retrieved, found := store.GetNode(nodeID)
+					if !found || !retrieved.Valid() {
+						errors <- fmt.Errorf("GetNode failed in goroutine %d, op %d", gid, j)
+					}
+				case 2:
+					newNode := createConcurrentTestNode(nodeID, "race-put")
+					resultNode := store.PutNode(newNode)
+					if !resultNode.Valid() {
+						errors <- fmt.Errorf("PutNode failed in goroutine %d, op %d", gid, j)
+					}
+				}
+			}
+		}(i)
+	}
+	wg.Wait()
+	close(errors)
+
+	errorCount := 0
+	for err := range errors {
+		t.Error(err)
+		errorCount++
+	}
+	if errorCount > 0 {
+		t.Fatalf("Race condition test failed with %d errors", errorCount)
+	}
+}
+
+// --- Resource cleanup: goroutine leak detection ---
+func TestNodeStoreResourceCleanup(t *testing.T) {
+	// initialGoroutines := runtime.NumGoroutine()
+	store := NewNodeStore(nil, allowAllPeersFunc)
+	store.Start()
+	defer store.Stop()
+
+	time.Sleep(50 * time.Millisecond)
+	afterStartGoroutines := runtime.NumGoroutine()
+
+	const ops = 100
+	for i := 0; i < ops; i++ {
+		nodeID := types.NodeID(i + 1)
+		node := createConcurrentTestNode(nodeID, "cleanup-node")
+		resultNode := store.PutNode(node)
+		assert.True(t, resultNode.Valid())
+		store.UpdateNode(nodeID, func(n *types.Node) {
+			n.Hostname = "cleanup-updated"
+		})
+		retrieved, found := store.GetNode(nodeID)
+		assert.True(t, found && retrieved.Valid())
+		if i%10 == 9 {
+			store.DeleteNode(nodeID)
+		}
+	}
+	runtime.GC()
+	time.Sleep(100 * time.Millisecond)
+	finalGoroutines := runtime.NumGoroutine()
+	if finalGoroutines > afterStartGoroutines+2 {
+		t.Errorf("Potential goroutine leak: started with %d, ended with %d", afterStartGoroutines, finalGoroutines)
+	}
+}
+
+// --- Timeout/deadlock: operations complete within reasonable time ---
+func TestNodeStoreOperationTimeout(t *testing.T) {
+	store := NewNodeStore(nil, allowAllPeersFunc)
+	store.Start()
+	defer store.Stop()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+	defer cancel()
+
+	const ops = 30
+	var wg sync.WaitGroup
+	putResults := make([]error, ops)
+	updateResults := make([]error, ops)
+
+	// Launch all PutNode operations concurrently
+	for i := 1; i <= ops; i++ {
+		nodeID := types.NodeID(i)
+		wg.Add(1)
+		go func(idx int, id types.NodeID) {
+			defer wg.Done()
+			startPut := time.Now()
+			fmt.Printf("[TestNodeStoreOperationTimeout] %s: PutNode(%d) starting\n", startPut.Format("15:04:05.000"), id)
+			node := createConcurrentTestNode(id, "timeout-node")
+			resultNode := store.PutNode(node)
+			endPut := time.Now()
+			fmt.Printf("[TestNodeStoreOperationTimeout] %s: PutNode(%d) finished, valid=%v, duration=%v\n", endPut.Format("15:04:05.000"), id, resultNode.Valid(), endPut.Sub(startPut))
+			if !resultNode.Valid() {
+				putResults[idx-1] = fmt.Errorf("PutNode failed for node %d", id)
+			}
+		}(i, nodeID)
+	}
+	wg.Wait()
+
+	// Launch all UpdateNode operations concurrently
+	wg = sync.WaitGroup{}
+	for i := 1; i <= ops; i++ {
+		nodeID := types.NodeID(i)
+		wg.Add(1)
+		go func(idx int, id types.NodeID) {
+			defer wg.Done()
+			startUpdate := time.Now()
+			fmt.Printf("[TestNodeStoreOperationTimeout] %s: UpdateNode(%d) starting\n", startUpdate.Format("15:04:05.000"), id)
+			resultNode, ok := store.UpdateNode(id, func(n *types.Node) {
+				n.Hostname = "timeout-updated"
+			})
+			endUpdate := time.Now()
+			fmt.Printf("[TestNodeStoreOperationTimeout] %s: UpdateNode(%d) finished, valid=%v, ok=%v, duration=%v\n", endUpdate.Format("15:04:05.000"), id, resultNode.Valid(), ok, endUpdate.Sub(startUpdate))
+			if !ok || !resultNode.Valid() {
+				updateResults[idx-1] = fmt.Errorf("UpdateNode failed for node %d", id)
+			}
+		}(i, nodeID)
+	}
+	done := make(chan struct{})
+	go func() {
+		wg.Wait()
+		close(done)
+	}()
+	select {
+	case <-done:
+		errorCount := 0
+		for _, err := range putResults {
+			if err != nil {
+				t.Error(err)
+				errorCount++
+			}
+		}
+		for _, err := range updateResults {
+			if err != nil {
+				t.Error(err)
+				errorCount++
+			}
+		}
+		if errorCount == 0 {
+			t.Log("All concurrent operations completed successfully within timeout")
+		} else {
+			t.Fatalf("Some concurrent operations failed: %d errors", errorCount)
+		}
+	case <-ctx.Done():
+		fmt.Println("[TestNodeStoreOperationTimeout] Timeout reached, test failed")
+		t.Fatal("Operations timed out - potential deadlock or resource issue")
+	}
+}
+
+// --- Edge case: update non-existent node ---
+func TestNodeStoreUpdateNonExistentNode(t *testing.T) {
+	for i := 0; i < 10; i++ {
+		store := NewNodeStore(nil, allowAllPeersFunc)
+		store.Start()
+		nonExistentID := types.NodeID(999 + i)
+		updateCallCount := 0
+		fmt.Printf("[TestNodeStoreUpdateNonExistentNode] UpdateNode(%d) starting\n", nonExistentID)
+		resultNode, ok := store.UpdateNode(nonExistentID, func(n *types.Node) {
+			updateCallCount++
+			n.Hostname = "should-never-be-called"
+		})
+		fmt.Printf("[TestNodeStoreUpdateNonExistentNode] UpdateNode(%d) finished, valid=%v, ok=%v, updateCallCount=%d\n", nonExistentID, resultNode.Valid(), ok, updateCallCount)
+		assert.False(t, ok, "UpdateNode should return false for non-existent node")
+		assert.False(t, resultNode.Valid(), "UpdateNode should return invalid node for non-existent node")
+		assert.Equal(t, 0, updateCallCount, "UpdateFn should not be called for non-existent node")
+		store.Stop()
+	}
+}
+
+// --- Allocation benchmark ---
+func BenchmarkNodeStoreAllocations(b *testing.B) {
+	store := NewNodeStore(nil, allowAllPeersFunc)
+	store.Start()
+	defer store.Stop()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		nodeID := types.NodeID(i + 1)
+		node := createConcurrentTestNode(nodeID, "bench-node")
+		store.PutNode(node)
+		store.UpdateNode(nodeID, func(n *types.Node) {
+			n.Hostname = "bench-updated"
+		})
+		store.GetNode(nodeID)
+		if i%10 == 9 {
+			store.DeleteNode(nodeID)
+		}
+	}
+}
+
+func TestNodeStoreAllocationStats(t *testing.T) {
+	res := testing.Benchmark(BenchmarkNodeStoreAllocations)
+	allocs := res.AllocsPerOp()
+	t.Logf("NodeStore allocations per op: %.2f", float64(allocs))
+}