mirror of
https://github.com/juanfont/headscale.git
synced 2025-10-23 11:19:19 +02:00
This PR addresses some consistency issues that was introduced or discovered with the nodestore. nodestore: Now returns the node that is being put or updated when it is finished. This closes a race condition where when we read it back, we do not necessarily get the node with the given change and it ensures we get all the other updates from that batch write. auth: Authentication paths have been unified and simplified. It removes a lot of bad branches and ensures we only do the minimal work. A comprehensive auth test set has been created so we do not have to run integration tests to validate auth and it has allowed us to generate test cases for all the branches we currently know of. integration: added a lot more tooling and checks to validate that nodes reach the expected state when they come up and down. Standardised between the different auth models. A lot of this is to support or detect issues in the changes to nodestore (races) and auth (inconsistencies after login and reaching correct state) This PR was assisted, particularly tests, by claude code.
923 lines
36 KiB
Go
923 lines
36 KiB
Go
package integration
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"net/netip"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/cenkalti/backoff/v5"
|
|
"github.com/google/go-cmp/cmp"
|
|
v1 "github.com/juanfont/headscale/gen/go/headscale/v1"
|
|
policyv2 "github.com/juanfont/headscale/hscontrol/policy/v2"
|
|
"github.com/juanfont/headscale/hscontrol/types"
|
|
"github.com/juanfont/headscale/hscontrol/util"
|
|
"github.com/juanfont/headscale/integration/integrationutil"
|
|
"github.com/juanfont/headscale/integration/tsic"
|
|
"github.com/oauth2-proxy/mockoidc"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"golang.org/x/exp/maps"
|
|
"golang.org/x/exp/slices"
|
|
"tailscale.com/tailcfg"
|
|
"tailscale.com/types/ptr"
|
|
)
|
|
|
|
const (
|
|
// derpPingTimeout defines the timeout for individual DERP ping operations
|
|
// Used in DERP connectivity tests to verify relay server communication.
|
|
derpPingTimeout = 2 * time.Second
|
|
|
|
// derpPingCount defines the number of ping attempts for DERP connectivity tests
|
|
// Higher count provides better reliability assessment of DERP connectivity.
|
|
derpPingCount = 10
|
|
|
|
// TimestampFormat is the standard timestamp format used across all integration tests
|
|
// Format: "2006-01-02T15-04-05.999999999" provides high precision timestamps
|
|
// suitable for debugging and log correlation in integration tests.
|
|
TimestampFormat = "2006-01-02T15-04-05.999999999"
|
|
|
|
// TimestampFormatRunID is used for generating unique run identifiers
|
|
// Format: "20060102-150405" provides compact date-time for file/directory names.
|
|
TimestampFormatRunID = "20060102-150405"
|
|
)
|
|
|
|
// NodeSystemStatus represents the status of a node across different systems
|
|
type NodeSystemStatus struct {
|
|
Batcher bool
|
|
BatcherConnCount int
|
|
MapResponses bool
|
|
NodeStore bool
|
|
}
|
|
|
|
// requireNotNil validates that an object is not nil and fails the test if it is.
|
|
// This helper provides consistent error messaging for nil checks in integration tests.
|
|
func requireNotNil(t *testing.T, object interface{}) {
|
|
t.Helper()
|
|
require.NotNil(t, object)
|
|
}
|
|
|
|
// requireNoErrHeadscaleEnv validates that headscale environment creation succeeded.
|
|
// Provides specific error context for headscale environment setup failures.
|
|
func requireNoErrHeadscaleEnv(t *testing.T, err error) {
|
|
t.Helper()
|
|
require.NoError(t, err, "failed to create headscale environment")
|
|
}
|
|
|
|
// requireNoErrGetHeadscale validates that headscale server retrieval succeeded.
|
|
// Provides specific error context for headscale server access failures.
|
|
func requireNoErrGetHeadscale(t *testing.T, err error) {
|
|
t.Helper()
|
|
require.NoError(t, err, "failed to get headscale")
|
|
}
|
|
|
|
// requireNoErrListClients validates that client listing operations succeeded.
|
|
// Provides specific error context for client enumeration failures.
|
|
func requireNoErrListClients(t *testing.T, err error) {
|
|
t.Helper()
|
|
require.NoError(t, err, "failed to list clients")
|
|
}
|
|
|
|
// requireNoErrListClientIPs validates that client IP retrieval succeeded.
|
|
// Provides specific error context for client IP address enumeration failures.
|
|
func requireNoErrListClientIPs(t *testing.T, err error) {
|
|
t.Helper()
|
|
require.NoError(t, err, "failed to get client IPs")
|
|
}
|
|
|
|
// requireNoErrSync validates that client synchronization operations succeeded.
|
|
// Provides specific error context for client sync failures across the network.
|
|
func requireNoErrSync(t *testing.T, err error) {
|
|
t.Helper()
|
|
require.NoError(t, err, "failed to have all clients sync up")
|
|
}
|
|
|
|
// requireNoErrListFQDN validates that FQDN listing operations succeeded.
|
|
// Provides specific error context for DNS name enumeration failures.
|
|
func requireNoErrListFQDN(t *testing.T, err error) {
|
|
t.Helper()
|
|
require.NoError(t, err, "failed to list FQDNs")
|
|
}
|
|
|
|
// requireNoErrLogout validates that tailscale node logout operations succeeded.
|
|
// Provides specific error context for client logout failures.
|
|
func requireNoErrLogout(t *testing.T, err error) {
|
|
t.Helper()
|
|
require.NoError(t, err, "failed to log out tailscale nodes")
|
|
}
|
|
|
|
// collectExpectedNodeIDs extracts node IDs from a list of TailscaleClients for validation purposes
|
|
func collectExpectedNodeIDs(t *testing.T, clients []TailscaleClient) []types.NodeID {
|
|
t.Helper()
|
|
|
|
expectedNodes := make([]types.NodeID, 0, len(clients))
|
|
for _, client := range clients {
|
|
status := client.MustStatus()
|
|
nodeID, err := strconv.ParseUint(string(status.Self.ID), 10, 64)
|
|
require.NoError(t, err)
|
|
expectedNodes = append(expectedNodes, types.NodeID(nodeID))
|
|
}
|
|
return expectedNodes
|
|
}
|
|
|
|
// validateInitialConnection performs comprehensive validation after initial client login.
|
|
// Validates that all nodes are online and have proper NetInfo/DERP configuration,
|
|
// essential for ensuring successful initial connection state in relogin tests.
|
|
func validateInitialConnection(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) {
|
|
t.Helper()
|
|
|
|
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after initial login", 120*time.Second)
|
|
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after initial login", 3*time.Minute)
|
|
}
|
|
|
|
// validateLogoutComplete performs comprehensive validation after client logout.
|
|
// Ensures all nodes are properly offline across all headscale systems,
|
|
// critical for validating clean logout state in relogin tests.
|
|
func validateLogoutComplete(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) {
|
|
t.Helper()
|
|
|
|
requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should be offline after logout", 120*time.Second)
|
|
}
|
|
|
|
// validateReloginComplete performs comprehensive validation after client relogin.
|
|
// Validates that all nodes are back online with proper NetInfo/DERP configuration,
|
|
// ensuring successful relogin state restoration in integration tests.
|
|
func validateReloginComplete(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) {
|
|
t.Helper()
|
|
|
|
requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after relogin", 120*time.Second)
|
|
requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after relogin", 3*time.Minute)
|
|
}
|
|
|
|
// requireAllClientsOnline validates that all nodes are online/offline across all headscale systems
|
|
// requireAllClientsOnline verifies all expected nodes are in the specified online state across all systems
|
|
func requireAllClientsOnline(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, expectedOnline bool, message string, timeout time.Duration) {
|
|
t.Helper()
|
|
|
|
startTime := time.Now()
|
|
stateStr := "offline"
|
|
if expectedOnline {
|
|
stateStr = "online"
|
|
}
|
|
t.Logf("requireAllSystemsOnline: Starting %s validation for %d nodes at %s - %s", stateStr, len(expectedNodes), startTime.Format(TimestampFormat), message)
|
|
|
|
if expectedOnline {
|
|
// For online validation, use the existing logic with full timeout
|
|
requireAllClientsOnlineWithSingleTimeout(t, headscale, expectedNodes, expectedOnline, message, timeout)
|
|
} else {
|
|
// For offline validation, use staged approach with component-specific timeouts
|
|
requireAllClientsOfflineStaged(t, headscale, expectedNodes, message, timeout)
|
|
}
|
|
|
|
endTime := time.Now()
|
|
t.Logf("requireAllSystemsOnline: Completed %s validation for %d nodes at %s - Duration: %s - %s", stateStr, len(expectedNodes), endTime.Format(TimestampFormat), endTime.Sub(startTime), message)
|
|
}
|
|
|
|
// requireAllClientsOnlineWithSingleTimeout is the original validation logic for online state
|
|
func requireAllClientsOnlineWithSingleTimeout(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, expectedOnline bool, message string, timeout time.Duration) {
|
|
t.Helper()
|
|
|
|
var prevReport string
|
|
require.EventuallyWithT(t, func(c *assert.CollectT) {
|
|
// Get batcher state
|
|
debugInfo, err := headscale.DebugBatcher()
|
|
assert.NoError(c, err, "Failed to get batcher debug info")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Get map responses
|
|
mapResponses, err := headscale.GetAllMapReponses()
|
|
assert.NoError(c, err, "Failed to get map responses")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Get nodestore state
|
|
nodeStore, err := headscale.DebugNodeStore()
|
|
assert.NoError(c, err, "Failed to get nodestore debug info")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Validate that all expected nodes are present in nodeStore
|
|
for _, nodeID := range expectedNodes {
|
|
_, exists := nodeStore[nodeID]
|
|
assert.True(c, exists, "Expected node %d not found in nodeStore", nodeID)
|
|
}
|
|
|
|
// Check that we have map responses for expected nodes
|
|
mapResponseCount := len(mapResponses)
|
|
expectedCount := len(expectedNodes)
|
|
assert.GreaterOrEqual(c, mapResponseCount, expectedCount, "MapResponses insufficient - expected at least %d responses, got %d", expectedCount, mapResponseCount)
|
|
|
|
// Build status map for each node
|
|
nodeStatus := make(map[types.NodeID]NodeSystemStatus)
|
|
|
|
// Initialize all expected nodes
|
|
for _, nodeID := range expectedNodes {
|
|
nodeStatus[nodeID] = NodeSystemStatus{}
|
|
}
|
|
|
|
// Check batcher state for expected nodes
|
|
for _, nodeID := range expectedNodes {
|
|
nodeIDStr := fmt.Sprintf("%d", nodeID)
|
|
if nodeInfo, exists := debugInfo.ConnectedNodes[nodeIDStr]; exists {
|
|
if status, exists := nodeStatus[nodeID]; exists {
|
|
status.Batcher = nodeInfo.Connected
|
|
status.BatcherConnCount = nodeInfo.ActiveConnections
|
|
nodeStatus[nodeID] = status
|
|
}
|
|
} else {
|
|
// Node not found in batcher, mark as disconnected
|
|
if status, exists := nodeStatus[nodeID]; exists {
|
|
status.Batcher = false
|
|
status.BatcherConnCount = 0
|
|
nodeStatus[nodeID] = status
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check map responses using buildExpectedOnlineMap
|
|
onlineFromMaps := make(map[types.NodeID]bool)
|
|
onlineMap := integrationutil.BuildExpectedOnlineMap(mapResponses)
|
|
|
|
// For single node scenarios, we can't validate peer visibility since there are no peers
|
|
if len(expectedNodes) == 1 {
|
|
// For single node, just check that we have map responses for the node
|
|
for nodeID := range nodeStatus {
|
|
if _, exists := onlineMap[nodeID]; exists {
|
|
onlineFromMaps[nodeID] = true
|
|
} else {
|
|
onlineFromMaps[nodeID] = false
|
|
}
|
|
}
|
|
} else {
|
|
// Multi-node scenario: check peer visibility
|
|
for nodeID := range nodeStatus {
|
|
// Initialize as offline - will be set to true only if visible in all relevant peer maps
|
|
onlineFromMaps[nodeID] = false
|
|
|
|
// Count how many peer maps should show this node
|
|
expectedPeerMaps := 0
|
|
foundOnlinePeerMaps := 0
|
|
|
|
for id, peerMap := range onlineMap {
|
|
if id == nodeID {
|
|
continue // Skip self-references
|
|
}
|
|
expectedPeerMaps++
|
|
|
|
if online, exists := peerMap[nodeID]; exists && online {
|
|
foundOnlinePeerMaps++
|
|
}
|
|
}
|
|
|
|
// Node is considered online if it appears online in all peer maps
|
|
// (or if there are no peer maps to check)
|
|
if expectedPeerMaps == 0 || foundOnlinePeerMaps == expectedPeerMaps {
|
|
onlineFromMaps[nodeID] = true
|
|
}
|
|
}
|
|
}
|
|
assert.Lenf(c, onlineFromMaps, expectedCount, "MapResponses missing nodes in status check")
|
|
|
|
// Update status with map response data
|
|
for nodeID, online := range onlineFromMaps {
|
|
if status, exists := nodeStatus[nodeID]; exists {
|
|
status.MapResponses = online
|
|
nodeStatus[nodeID] = status
|
|
}
|
|
}
|
|
|
|
// Check nodestore state for expected nodes
|
|
for _, nodeID := range expectedNodes {
|
|
if node, exists := nodeStore[nodeID]; exists {
|
|
if status, exists := nodeStatus[nodeID]; exists {
|
|
// Check if node is online in nodestore
|
|
status.NodeStore = node.IsOnline != nil && *node.IsOnline
|
|
nodeStatus[nodeID] = status
|
|
}
|
|
}
|
|
}
|
|
|
|
// Verify all systems show nodes in expected state and report failures
|
|
allMatch := true
|
|
var failureReport strings.Builder
|
|
|
|
ids := types.NodeIDs(maps.Keys(nodeStatus))
|
|
slices.Sort(ids)
|
|
for _, nodeID := range ids {
|
|
status := nodeStatus[nodeID]
|
|
systemsMatch := (status.Batcher == expectedOnline) &&
|
|
(status.MapResponses == expectedOnline) &&
|
|
(status.NodeStore == expectedOnline)
|
|
|
|
if !systemsMatch {
|
|
allMatch = false
|
|
stateStr := "offline"
|
|
if expectedOnline {
|
|
stateStr = "online"
|
|
}
|
|
failureReport.WriteString(fmt.Sprintf("node:%d is not fully %s (timestamp: %s):\n", nodeID, stateStr, time.Now().Format(TimestampFormat)))
|
|
failureReport.WriteString(fmt.Sprintf(" - batcher: %t (expected: %t)\n", status.Batcher, expectedOnline))
|
|
failureReport.WriteString(fmt.Sprintf(" - conn count: %d\n", status.BatcherConnCount))
|
|
failureReport.WriteString(fmt.Sprintf(" - mapresponses: %t (expected: %t, down with at least one peer)\n", status.MapResponses, expectedOnline))
|
|
failureReport.WriteString(fmt.Sprintf(" - nodestore: %t (expected: %t)\n", status.NodeStore, expectedOnline))
|
|
}
|
|
}
|
|
|
|
if !allMatch {
|
|
if diff := cmp.Diff(prevReport, failureReport.String()); diff != "" {
|
|
t.Logf("Node state validation report changed at %s:", time.Now().Format(TimestampFormat))
|
|
t.Logf("Previous report:\n%s", prevReport)
|
|
t.Logf("Current report:\n%s", failureReport.String())
|
|
t.Logf("Report diff:\n%s", diff)
|
|
prevReport = failureReport.String()
|
|
}
|
|
|
|
failureReport.WriteString(fmt.Sprintf("validation_timestamp: %s\n", time.Now().Format(TimestampFormat)))
|
|
// Note: timeout_remaining not available in this context
|
|
|
|
assert.Fail(c, failureReport.String())
|
|
}
|
|
|
|
stateStr := "offline"
|
|
if expectedOnline {
|
|
stateStr = "online"
|
|
}
|
|
assert.True(c, allMatch, fmt.Sprintf("Not all %d nodes are %s across all systems (batcher, mapresponses, nodestore)", len(expectedNodes), stateStr))
|
|
}, timeout, 2*time.Second, message)
|
|
}
|
|
|
|
// requireAllClientsOfflineStaged validates offline state with staged timeouts for different components
|
|
func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, message string, totalTimeout time.Duration) {
|
|
t.Helper()
|
|
|
|
// Stage 1: Verify batcher disconnection (should be immediate)
|
|
t.Logf("Stage 1: Verifying batcher disconnection for %d nodes", len(expectedNodes))
|
|
require.EventuallyWithT(t, func(c *assert.CollectT) {
|
|
debugInfo, err := headscale.DebugBatcher()
|
|
assert.NoError(c, err, "Failed to get batcher debug info")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
allBatcherOffline := true
|
|
for _, nodeID := range expectedNodes {
|
|
nodeIDStr := fmt.Sprintf("%d", nodeID)
|
|
if nodeInfo, exists := debugInfo.ConnectedNodes[nodeIDStr]; exists && nodeInfo.Connected {
|
|
allBatcherOffline = false
|
|
assert.False(c, nodeInfo.Connected, "Node %d should not be connected in batcher", nodeID)
|
|
}
|
|
}
|
|
assert.True(c, allBatcherOffline, "All nodes should be disconnected from batcher")
|
|
}, 15*time.Second, 1*time.Second, "batcher disconnection validation")
|
|
|
|
// Stage 2: Verify nodestore offline status (up to 15 seconds due to disconnect detection delay)
|
|
t.Logf("Stage 2: Verifying nodestore offline status for %d nodes (allowing for 10s disconnect detection delay)", len(expectedNodes))
|
|
require.EventuallyWithT(t, func(c *assert.CollectT) {
|
|
nodeStore, err := headscale.DebugNodeStore()
|
|
assert.NoError(c, err, "Failed to get nodestore debug info")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
allNodeStoreOffline := true
|
|
for _, nodeID := range expectedNodes {
|
|
if node, exists := nodeStore[nodeID]; exists {
|
|
isOnline := node.IsOnline != nil && *node.IsOnline
|
|
if isOnline {
|
|
allNodeStoreOffline = false
|
|
assert.False(c, isOnline, "Node %d should be offline in nodestore", nodeID)
|
|
}
|
|
}
|
|
}
|
|
assert.True(c, allNodeStoreOffline, "All nodes should be offline in nodestore")
|
|
}, 20*time.Second, 1*time.Second, "nodestore offline validation")
|
|
|
|
// Stage 3: Verify map response propagation (longest delay due to peer update timing)
|
|
t.Logf("Stage 3: Verifying map response propagation for %d nodes (allowing for peer map update delays)", len(expectedNodes))
|
|
require.EventuallyWithT(t, func(c *assert.CollectT) {
|
|
mapResponses, err := headscale.GetAllMapReponses()
|
|
assert.NoError(c, err, "Failed to get map responses")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
onlineMap := integrationutil.BuildExpectedOnlineMap(mapResponses)
|
|
allMapResponsesOffline := true
|
|
|
|
if len(expectedNodes) == 1 {
|
|
// Single node: check if it appears in map responses
|
|
for nodeID := range onlineMap {
|
|
if slices.Contains(expectedNodes, nodeID) {
|
|
allMapResponsesOffline = false
|
|
assert.False(c, true, "Node %d should not appear in map responses", nodeID)
|
|
}
|
|
}
|
|
} else {
|
|
// Multi-node: check peer visibility
|
|
for _, nodeID := range expectedNodes {
|
|
for id, peerMap := range onlineMap {
|
|
if id == nodeID {
|
|
continue // Skip self-references
|
|
}
|
|
if online, exists := peerMap[nodeID]; exists && online {
|
|
allMapResponsesOffline = false
|
|
assert.False(c, online, "Node %d should not be visible in node %d's map response", nodeID, id)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
assert.True(c, allMapResponsesOffline, "All nodes should be absent from peer map responses")
|
|
}, 60*time.Second, 2*time.Second, "map response propagation validation")
|
|
|
|
t.Logf("All stages completed: nodes are fully offline across all systems")
|
|
}
|
|
|
|
// requireAllClientsNetInfoAndDERP validates that all nodes have NetInfo in the database
|
|
// and a valid DERP server based on the NetInfo. This function follows the pattern of
|
|
// requireAllClientsOnline by using hsic.DebugNodeStore to get the database state.
|
|
func requireAllClientsNetInfoAndDERP(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, message string, timeout time.Duration) {
|
|
t.Helper()
|
|
|
|
startTime := time.Now()
|
|
t.Logf("requireAllClientsNetInfoAndDERP: Starting NetInfo/DERP validation for %d nodes at %s - %s", len(expectedNodes), startTime.Format(TimestampFormat), message)
|
|
|
|
require.EventuallyWithT(t, func(c *assert.CollectT) {
|
|
// Get nodestore state
|
|
nodeStore, err := headscale.DebugNodeStore()
|
|
assert.NoError(c, err, "Failed to get nodestore debug info")
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Validate that all expected nodes are present in nodeStore
|
|
for _, nodeID := range expectedNodes {
|
|
_, exists := nodeStore[nodeID]
|
|
assert.True(c, exists, "Expected node %d not found in nodeStore during NetInfo validation", nodeID)
|
|
}
|
|
|
|
// Check each expected node
|
|
for _, nodeID := range expectedNodes {
|
|
node, exists := nodeStore[nodeID]
|
|
assert.True(c, exists, "Node %d not found in nodestore during NetInfo validation", nodeID)
|
|
if !exists {
|
|
continue
|
|
}
|
|
|
|
// Validate that the node has Hostinfo
|
|
assert.NotNil(c, node.Hostinfo, "Node %d (%s) should have Hostinfo for NetInfo validation", nodeID, node.Hostname)
|
|
if node.Hostinfo == nil {
|
|
t.Logf("Node %d (%s) missing Hostinfo at %s", nodeID, node.Hostname, time.Now().Format(TimestampFormat))
|
|
continue
|
|
}
|
|
|
|
// Validate that the node has NetInfo
|
|
assert.NotNil(c, node.Hostinfo.NetInfo, "Node %d (%s) should have NetInfo in Hostinfo for DERP connectivity", nodeID, node.Hostname)
|
|
if node.Hostinfo.NetInfo == nil {
|
|
t.Logf("Node %d (%s) missing NetInfo at %s", nodeID, node.Hostname, time.Now().Format(TimestampFormat))
|
|
continue
|
|
}
|
|
|
|
// Validate that the node has a valid DERP server (PreferredDERP should be > 0)
|
|
preferredDERP := node.Hostinfo.NetInfo.PreferredDERP
|
|
assert.Greater(c, preferredDERP, 0, "Node %d (%s) should have a valid DERP server (PreferredDERP > 0) for relay connectivity, got %d", nodeID, node.Hostname, preferredDERP)
|
|
|
|
t.Logf("Node %d (%s) has valid NetInfo with DERP server %d at %s", nodeID, node.Hostname, preferredDERP, time.Now().Format(TimestampFormat))
|
|
}
|
|
}, timeout, 5*time.Second, message)
|
|
|
|
endTime := time.Now()
|
|
duration := endTime.Sub(startTime)
|
|
t.Logf("requireAllClientsNetInfoAndDERP: Completed NetInfo/DERP validation for %d nodes at %s - Duration: %v - %s", len(expectedNodes), endTime.Format(TimestampFormat), duration, message)
|
|
}
|
|
|
|
// assertLastSeenSet validates that a node has a non-nil LastSeen timestamp.
|
|
// Critical for ensuring node activity tracking is functioning properly.
|
|
func assertLastSeenSet(t *testing.T, node *v1.Node) {
|
|
assert.NotNil(t, node)
|
|
assert.NotNil(t, node.GetLastSeen())
|
|
}
|
|
|
|
// assertTailscaleNodesLogout verifies that all provided Tailscale clients
|
|
// are in the logged-out state (NeedsLogin).
|
|
func assertTailscaleNodesLogout(t assert.TestingT, clients []TailscaleClient) {
|
|
if h, ok := t.(interface{ Helper() }); ok {
|
|
h.Helper()
|
|
}
|
|
|
|
for _, client := range clients {
|
|
status, err := client.Status()
|
|
assert.NoError(t, err, "failed to get status for client %s", client.Hostname())
|
|
assert.Equal(t, "NeedsLogin", status.BackendState,
|
|
"client %s should be logged out", client.Hostname())
|
|
}
|
|
}
|
|
|
|
// pingAllHelper performs ping tests between all clients and addresses, returning success count.
|
|
// This is used to validate network connectivity in integration tests.
|
|
// Returns the total number of successful ping operations.
|
|
func pingAllHelper(t *testing.T, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) int {
|
|
t.Helper()
|
|
success := 0
|
|
|
|
for _, client := range clients {
|
|
for _, addr := range addrs {
|
|
err := client.Ping(addr, opts...)
|
|
if err != nil {
|
|
t.Errorf("failed to ping %s from %s: %s", addr, client.Hostname(), err)
|
|
} else {
|
|
success++
|
|
}
|
|
}
|
|
}
|
|
|
|
return success
|
|
}
|
|
|
|
// pingDerpAllHelper performs DERP-based ping tests between all clients and addresses.
|
|
// This specifically tests connectivity through DERP relay servers, which is important
|
|
// for validating NAT traversal and relay functionality. Returns success count.
|
|
func pingDerpAllHelper(t *testing.T, clients []TailscaleClient, addrs []string) int {
|
|
t.Helper()
|
|
success := 0
|
|
|
|
for _, client := range clients {
|
|
for _, addr := range addrs {
|
|
if isSelfClient(client, addr) {
|
|
continue
|
|
}
|
|
|
|
err := client.Ping(
|
|
addr,
|
|
tsic.WithPingTimeout(derpPingTimeout),
|
|
tsic.WithPingCount(derpPingCount),
|
|
tsic.WithPingUntilDirect(false),
|
|
)
|
|
if err != nil {
|
|
t.Logf("failed to ping %s from %s: %s", addr, client.Hostname(), err)
|
|
} else {
|
|
success++
|
|
}
|
|
}
|
|
}
|
|
|
|
return success
|
|
}
|
|
|
|
// isSelfClient determines if the given address belongs to the client itself.
|
|
// Used to avoid self-ping operations in connectivity tests by checking
|
|
// hostname and IP address matches.
|
|
func isSelfClient(client TailscaleClient, addr string) bool {
|
|
if addr == client.Hostname() {
|
|
return true
|
|
}
|
|
|
|
ips, err := client.IPs()
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
for _, ip := range ips {
|
|
if ip.String() == addr {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// assertClientsState validates the status and netmap of a list of clients for general connectivity.
|
|
// Runs parallel validation of status, netcheck, and netmap for all clients to ensure
|
|
// they have proper network configuration for all-to-all connectivity tests.
|
|
func assertClientsState(t *testing.T, clients []TailscaleClient) {
|
|
t.Helper()
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
for _, client := range clients {
|
|
wg.Add(1)
|
|
c := client // Avoid loop pointer
|
|
go func() {
|
|
defer wg.Done()
|
|
assertValidStatus(t, c)
|
|
assertValidNetcheck(t, c)
|
|
assertValidNetmap(t, c)
|
|
}()
|
|
}
|
|
|
|
t.Logf("waiting for client state checks to finish")
|
|
wg.Wait()
|
|
}
|
|
|
|
// assertValidNetmap validates that a client's netmap has all required fields for proper operation.
|
|
// Checks self node and all peers for essential networking data including hostinfo, addresses,
|
|
// endpoints, and DERP configuration. Skips validation for Tailscale versions below 1.56.
|
|
// This test is not suitable for ACL/partial connection tests.
|
|
func assertValidNetmap(t *testing.T, client TailscaleClient) {
|
|
t.Helper()
|
|
|
|
if !util.TailscaleVersionNewerOrEqual("1.56", client.Version()) {
|
|
t.Logf("%q has version %q, skipping netmap check...", client.Hostname(), client.Version())
|
|
|
|
return
|
|
}
|
|
|
|
t.Logf("Checking netmap of %q", client.Hostname())
|
|
|
|
netmap, err := client.Netmap()
|
|
if err != nil {
|
|
t.Fatalf("getting netmap for %q: %s", client.Hostname(), err)
|
|
}
|
|
|
|
assert.Truef(t, netmap.SelfNode.Hostinfo().Valid(), "%q does not have Hostinfo", client.Hostname())
|
|
if hi := netmap.SelfNode.Hostinfo(); hi.Valid() {
|
|
assert.LessOrEqual(t, 1, netmap.SelfNode.Hostinfo().Services().Len(), "%q does not have enough services, got: %v", client.Hostname(), netmap.SelfNode.Hostinfo().Services())
|
|
}
|
|
|
|
assert.NotEmptyf(t, netmap.SelfNode.AllowedIPs(), "%q does not have any allowed IPs", client.Hostname())
|
|
assert.NotEmptyf(t, netmap.SelfNode.Addresses(), "%q does not have any addresses", client.Hostname())
|
|
|
|
assert.Truef(t, netmap.SelfNode.Online().Get(), "%q is not online", client.Hostname())
|
|
|
|
assert.Falsef(t, netmap.SelfNode.Key().IsZero(), "%q does not have a valid NodeKey", client.Hostname())
|
|
assert.Falsef(t, netmap.SelfNode.Machine().IsZero(), "%q does not have a valid MachineKey", client.Hostname())
|
|
assert.Falsef(t, netmap.SelfNode.DiscoKey().IsZero(), "%q does not have a valid DiscoKey", client.Hostname())
|
|
|
|
for _, peer := range netmap.Peers {
|
|
assert.NotEqualf(t, "127.3.3.40:0", peer.LegacyDERPString(), "peer (%s) has no home DERP in %q's netmap, got: %s", peer.ComputedName(), client.Hostname(), peer.LegacyDERPString())
|
|
assert.NotEqualf(t, 0, peer.HomeDERP(), "peer (%s) has no home DERP in %q's netmap, got: %d", peer.ComputedName(), client.Hostname(), peer.HomeDERP())
|
|
|
|
assert.Truef(t, peer.Hostinfo().Valid(), "peer (%s) of %q does not have Hostinfo", peer.ComputedName(), client.Hostname())
|
|
if hi := peer.Hostinfo(); hi.Valid() {
|
|
assert.LessOrEqualf(t, 3, peer.Hostinfo().Services().Len(), "peer (%s) of %q does not have enough services, got: %v", peer.ComputedName(), client.Hostname(), peer.Hostinfo().Services())
|
|
|
|
// Netinfo is not always set
|
|
// assert.Truef(t, hi.NetInfo().Valid(), "peer (%s) of %q does not have NetInfo", peer.ComputedName(), client.Hostname())
|
|
if ni := hi.NetInfo(); ni.Valid() {
|
|
assert.NotEqualf(t, 0, ni.PreferredDERP(), "peer (%s) has no home DERP in %q's netmap, got: %s", peer.ComputedName(), client.Hostname(), peer.Hostinfo().NetInfo().PreferredDERP())
|
|
}
|
|
}
|
|
|
|
assert.NotEmptyf(t, peer.Endpoints(), "peer (%s) of %q does not have any endpoints", peer.ComputedName(), client.Hostname())
|
|
assert.NotEmptyf(t, peer.AllowedIPs(), "peer (%s) of %q does not have any allowed IPs", peer.ComputedName(), client.Hostname())
|
|
assert.NotEmptyf(t, peer.Addresses(), "peer (%s) of %q does not have any addresses", peer.ComputedName(), client.Hostname())
|
|
|
|
assert.Truef(t, peer.Online().Get(), "peer (%s) of %q is not online", peer.ComputedName(), client.Hostname())
|
|
|
|
assert.Falsef(t, peer.Key().IsZero(), "peer (%s) of %q does not have a valid NodeKey", peer.ComputedName(), client.Hostname())
|
|
assert.Falsef(t, peer.Machine().IsZero(), "peer (%s) of %q does not have a valid MachineKey", peer.ComputedName(), client.Hostname())
|
|
assert.Falsef(t, peer.DiscoKey().IsZero(), "peer (%s) of %q does not have a valid DiscoKey", peer.ComputedName(), client.Hostname())
|
|
}
|
|
}
|
|
|
|
// assertValidStatus validates that a client's status has all required fields for proper operation.
|
|
// Checks self and peer status for essential data including hostinfo, tailscale IPs, endpoints,
|
|
// and network map presence. This test is not suitable for ACL/partial connection tests.
|
|
func assertValidStatus(t *testing.T, client TailscaleClient) {
|
|
t.Helper()
|
|
status, err := client.Status(true)
|
|
if err != nil {
|
|
t.Fatalf("getting status for %q: %s", client.Hostname(), err)
|
|
}
|
|
|
|
assert.NotEmptyf(t, status.Self.HostName, "%q does not have HostName set, likely missing Hostinfo", client.Hostname())
|
|
assert.NotEmptyf(t, status.Self.OS, "%q does not have OS set, likely missing Hostinfo", client.Hostname())
|
|
assert.NotEmptyf(t, status.Self.Relay, "%q does not have a relay, likely missing Hostinfo/Netinfo", client.Hostname())
|
|
|
|
assert.NotEmptyf(t, status.Self.TailscaleIPs, "%q does not have Tailscale IPs", client.Hostname())
|
|
|
|
// This seem to not appear until version 1.56
|
|
if status.Self.AllowedIPs != nil {
|
|
assert.NotEmptyf(t, status.Self.AllowedIPs, "%q does not have any allowed IPs", client.Hostname())
|
|
}
|
|
|
|
assert.NotEmptyf(t, status.Self.Addrs, "%q does not have any endpoints", client.Hostname())
|
|
|
|
assert.Truef(t, status.Self.Online, "%q is not online", client.Hostname())
|
|
|
|
assert.Truef(t, status.Self.InNetworkMap, "%q is not in network map", client.Hostname())
|
|
|
|
// This isn't really relevant for Self as it won't be in its own socket/wireguard.
|
|
// assert.Truef(t, status.Self.InMagicSock, "%q is not tracked by magicsock", client.Hostname())
|
|
// assert.Truef(t, status.Self.InEngine, "%q is not in wireguard engine", client.Hostname())
|
|
|
|
for _, peer := range status.Peer {
|
|
assert.NotEmptyf(t, peer.HostName, "peer (%s) of %q does not have HostName set, likely missing Hostinfo", peer.DNSName, client.Hostname())
|
|
assert.NotEmptyf(t, peer.OS, "peer (%s) of %q does not have OS set, likely missing Hostinfo", peer.DNSName, client.Hostname())
|
|
assert.NotEmptyf(t, peer.Relay, "peer (%s) of %q does not have a relay, likely missing Hostinfo/Netinfo", peer.DNSName, client.Hostname())
|
|
|
|
assert.NotEmptyf(t, peer.TailscaleIPs, "peer (%s) of %q does not have Tailscale IPs", peer.DNSName, client.Hostname())
|
|
|
|
// This seem to not appear until version 1.56
|
|
if peer.AllowedIPs != nil {
|
|
assert.NotEmptyf(t, peer.AllowedIPs, "peer (%s) of %q does not have any allowed IPs", peer.DNSName, client.Hostname())
|
|
}
|
|
|
|
// Addrs does not seem to appear in the status from peers.
|
|
// assert.NotEmptyf(t, peer.Addrs, "peer (%s) of %q does not have any endpoints", peer.DNSName, client.Hostname())
|
|
|
|
assert.Truef(t, peer.Online, "peer (%s) of %q is not online", peer.DNSName, client.Hostname())
|
|
|
|
assert.Truef(t, peer.InNetworkMap, "peer (%s) of %q is not in network map", peer.DNSName, client.Hostname())
|
|
assert.Truef(t, peer.InMagicSock, "peer (%s) of %q is not tracked by magicsock", peer.DNSName, client.Hostname())
|
|
|
|
// TODO(kradalby): InEngine is only true when a proper tunnel is set up,
|
|
// there might be some interesting stuff to test here in the future.
|
|
// assert.Truef(t, peer.InEngine, "peer (%s) of %q is not in wireguard engine", peer.DNSName, client.Hostname())
|
|
}
|
|
}
|
|
|
|
// assertValidNetcheck validates that a client has a proper DERP relay configured.
|
|
// Ensures the client has discovered and selected a DERP server for relay functionality,
|
|
// which is essential for NAT traversal and connectivity in restricted networks.
|
|
func assertValidNetcheck(t *testing.T, client TailscaleClient) {
|
|
t.Helper()
|
|
report, err := client.Netcheck()
|
|
if err != nil {
|
|
t.Fatalf("getting status for %q: %s", client.Hostname(), err)
|
|
}
|
|
|
|
assert.NotEqualf(t, 0, report.PreferredDERP, "%q does not have a DERP relay", client.Hostname())
|
|
}
|
|
|
|
// assertCommandOutputContains executes a command with exponential backoff retry until the output
|
|
// contains the expected string or timeout is reached (10 seconds).
|
|
// This implements eventual consistency patterns and should be used instead of time.Sleep
|
|
// before executing commands that depend on network state propagation.
|
|
//
|
|
// Timeout: 10 seconds with exponential backoff
|
|
// Use cases: DNS resolution, route propagation, policy updates.
|
|
func assertCommandOutputContains(t *testing.T, c TailscaleClient, command []string, contains string) {
|
|
t.Helper()
|
|
|
|
_, err := backoff.Retry(t.Context(), func() (struct{}, error) {
|
|
stdout, stderr, err := c.Execute(command)
|
|
if err != nil {
|
|
return struct{}{}, fmt.Errorf("executing command, stdout: %q stderr: %q, err: %w", stdout, stderr, err)
|
|
}
|
|
|
|
if !strings.Contains(stdout, contains) {
|
|
return struct{}{}, fmt.Errorf("executing command, expected string %q not found in %q", contains, stdout)
|
|
}
|
|
|
|
return struct{}{}, nil
|
|
}, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(10*time.Second))
|
|
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
// dockertestMaxWait returns the maximum wait time for Docker-based test operations.
|
|
// Uses longer timeouts in CI environments to account for slower resource allocation
|
|
// and higher system load during automated testing.
|
|
func dockertestMaxWait() time.Duration {
|
|
wait := 300 * time.Second //nolint
|
|
|
|
if util.IsCI() {
|
|
wait = 600 * time.Second //nolint
|
|
}
|
|
|
|
return wait
|
|
}
|
|
|
|
// didClientUseWebsocketForDERP analyzes client logs to determine if WebSocket was used for DERP.
|
|
// Searches for WebSocket connection indicators in client logs to validate
|
|
// DERP relay communication method for debugging connectivity issues.
|
|
func didClientUseWebsocketForDERP(t *testing.T, client TailscaleClient) bool {
|
|
t.Helper()
|
|
|
|
buf := &bytes.Buffer{}
|
|
err := client.WriteLogs(buf, buf)
|
|
if err != nil {
|
|
t.Fatalf("failed to fetch client logs: %s: %s", client.Hostname(), err)
|
|
}
|
|
|
|
count, err := countMatchingLines(buf, func(line string) bool {
|
|
return strings.Contains(line, "websocket: connected to ")
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("failed to process client logs: %s: %s", client.Hostname(), err)
|
|
}
|
|
|
|
return count > 0
|
|
}
|
|
|
|
// countMatchingLines counts lines in a reader that match the given predicate function.
|
|
// Uses optimized buffering for log analysis and provides flexible line-by-line
|
|
// filtering for log parsing and pattern matching in integration tests.
|
|
func countMatchingLines(in io.Reader, predicate func(string) bool) (int, error) {
|
|
count := 0
|
|
scanner := bufio.NewScanner(in)
|
|
{
|
|
const logBufferInitialSize = 1024 << 10 // preallocate 1 MiB
|
|
buff := make([]byte, logBufferInitialSize)
|
|
scanner.Buffer(buff, len(buff))
|
|
scanner.Split(bufio.ScanLines)
|
|
}
|
|
|
|
for scanner.Scan() {
|
|
if predicate(scanner.Text()) {
|
|
count += 1
|
|
}
|
|
}
|
|
|
|
return count, scanner.Err()
|
|
}
|
|
|
|
// wildcard returns a wildcard alias (*) for use in policy v2 configurations.
|
|
// Provides a convenient helper for creating permissive policy rules.
|
|
func wildcard() policyv2.Alias {
|
|
return policyv2.Wildcard
|
|
}
|
|
|
|
// usernamep returns a pointer to a Username as an Alias for policy v2 configurations.
|
|
// Used in ACL rules to reference specific users in network access policies.
|
|
func usernamep(name string) policyv2.Alias {
|
|
return ptr.To(policyv2.Username(name))
|
|
}
|
|
|
|
// hostp returns a pointer to a Host as an Alias for policy v2 configurations.
|
|
// Used in ACL rules to reference specific hosts in network access policies.
|
|
func hostp(name string) policyv2.Alias {
|
|
return ptr.To(policyv2.Host(name))
|
|
}
|
|
|
|
// groupp returns a pointer to a Group as an Alias for policy v2 configurations.
|
|
// Used in ACL rules to reference user groups in network access policies.
|
|
func groupp(name string) policyv2.Alias {
|
|
return ptr.To(policyv2.Group(name))
|
|
}
|
|
|
|
// tagp returns a pointer to a Tag as an Alias for policy v2 configurations.
|
|
// Used in ACL rules to reference node tags in network access policies.
|
|
func tagp(name string) policyv2.Alias {
|
|
return ptr.To(policyv2.Tag(name))
|
|
}
|
|
|
|
// prefixp returns a pointer to a Prefix from a CIDR string for policy v2 configurations.
|
|
// Converts CIDR notation to policy prefix format for network range specifications.
|
|
func prefixp(cidr string) policyv2.Alias {
|
|
prefix := netip.MustParsePrefix(cidr)
|
|
return ptr.To(policyv2.Prefix(prefix))
|
|
}
|
|
|
|
// aliasWithPorts creates an AliasWithPorts structure from an alias and port ranges.
|
|
// Combines network targets with specific port restrictions for fine-grained
|
|
// access control in policy v2 configurations.
|
|
func aliasWithPorts(alias policyv2.Alias, ports ...tailcfg.PortRange) policyv2.AliasWithPorts {
|
|
return policyv2.AliasWithPorts{
|
|
Alias: alias,
|
|
Ports: ports,
|
|
}
|
|
}
|
|
|
|
// usernameOwner returns a Username as an Owner for use in TagOwners policies.
|
|
// Specifies which users can assign and manage specific tags in ACL configurations.
|
|
func usernameOwner(name string) policyv2.Owner {
|
|
return ptr.To(policyv2.Username(name))
|
|
}
|
|
|
|
// groupOwner returns a Group as an Owner for use in TagOwners policies.
|
|
// Specifies which groups can assign and manage specific tags in ACL configurations.
|
|
func groupOwner(name string) policyv2.Owner {
|
|
return ptr.To(policyv2.Group(name))
|
|
}
|
|
|
|
// usernameApprover returns a Username as an AutoApprover for subnet route policies.
|
|
// Specifies which users can automatically approve subnet route advertisements.
|
|
func usernameApprover(name string) policyv2.AutoApprover {
|
|
return ptr.To(policyv2.Username(name))
|
|
}
|
|
|
|
// groupApprover returns a Group as an AutoApprover for subnet route policies.
|
|
// Specifies which groups can automatically approve subnet route advertisements.
|
|
func groupApprover(name string) policyv2.AutoApprover {
|
|
return ptr.To(policyv2.Group(name))
|
|
}
|
|
|
|
// tagApprover returns a Tag as an AutoApprover for subnet route policies.
|
|
// Specifies which tagged nodes can automatically approve subnet route advertisements.
|
|
func tagApprover(name string) policyv2.AutoApprover {
|
|
return ptr.To(policyv2.Tag(name))
|
|
}
|
|
|
|
// oidcMockUser creates a MockUser for OIDC authentication testing.
|
|
// Generates consistent test user data with configurable email verification status
|
|
// for validating OIDC integration flows in headscale authentication tests.
|
|
func oidcMockUser(username string, emailVerified bool) mockoidc.MockUser {
|
|
return mockoidc.MockUser{
|
|
Subject: username,
|
|
PreferredUsername: username,
|
|
Email: username + "@headscale.net",
|
|
EmailVerified: emailVerified,
|
|
}
|
|
}
|