1
0
mirror of https://github.com/juanfont/headscale.git synced 2025-08-10 13:46:46 +02:00

cmd/hi: improve reliability and remove backwards compatibility

- Add retry logic for container removal with exponential backoff
- Replace arbitrary sleep with deterministic container finalization checking
- Use label-based container discovery exclusively (remove time-based fallbacks)
- Add random hash to run IDs to prevent collisions (YYYYMMDD-HHMMSS-HASH)
- Create shared utility functions for consistent run ID handling
- Distinguish between expected and unexpected extraction errors
- Consolidate duplicate labeling calls in Tailscale container creation
- Remove backwards compatibility code for cleaner behavior
This commit is contained in:
Kristoffer Dalby 2025-06-19 15:19:52 +02:00
parent 8fae7edd60
commit 3c8e194d8b
No known key found for this signature in database
5 changed files with 158 additions and 94 deletions

View File

@ -10,6 +10,7 @@ import (
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/image"
"github.com/docker/docker/client"
"github.com/docker/docker/errdefs"
)
// cleanupBeforeTest performs cleanup operations before running tests.
@ -66,10 +67,8 @@ func killTestContainers(ctx context.Context) error {
_ = cli.ContainerKill(ctx, cont.ID, "KILL")
}
// Then remove the container
if err := cli.ContainerRemove(ctx, cont.ID, container.RemoveOptions{
Force: true,
}); err == nil {
// Then remove the container with retry logic
if removeContainerWithRetry(ctx, cli, cont.ID) {
removed++
}
}
@ -84,6 +83,32 @@ func killTestContainers(ctx context.Context) error {
return nil
}
// removeContainerWithRetry attempts to remove a container with exponential backoff retry logic.
func removeContainerWithRetry(ctx context.Context, cli *client.Client, containerID string) bool {
maxRetries := 3
baseDelay := 100 * time.Millisecond
for attempt := 0; attempt < maxRetries; attempt++ {
err := cli.ContainerRemove(ctx, containerID, container.RemoveOptions{
Force: true,
})
if err == nil {
return true
}
// If this is the last attempt, don't wait
if attempt == maxRetries-1 {
break
}
// Wait with exponential backoff
delay := baseDelay * time.Duration(1<<attempt)
time.Sleep(delay)
}
return false
}
// pruneDockerNetworks removes unused Docker networks.
func pruneDockerNetworks(ctx context.Context) error {
cli, err := createDockerClient()
@ -167,7 +192,13 @@ func cleanCacheVolume(ctx context.Context) error {
volumeName := "hs-integration-go-cache"
err = cli.VolumeRemove(ctx, volumeName, true)
if err != nil {
fmt.Printf("Go module cache volume not found or already removed\n")
if errdefs.IsNotFound(err) {
fmt.Printf("Go module cache volume not found: %s\n", volumeName)
} else if errdefs.IsConflict(err) {
fmt.Printf("Go module cache volume is in use and cannot be removed: %s\n", volumeName)
} else {
fmt.Printf("Failed to remove Go module cache volume %s: %v\n", volumeName, err)
}
} else {
fmt.Printf("Removed Go module cache volume: %s\n", volumeName)
}

View File

@ -19,6 +19,7 @@ import (
"github.com/docker/docker/api/types/mount"
"github.com/docker/docker/client"
"github.com/docker/docker/pkg/stdcopy"
"github.com/juanfont/headscale/integration/dockertestutil"
)
var (
@ -35,7 +36,7 @@ func runTestContainer(ctx context.Context, config *RunConfig) error {
}
defer cli.Close()
runID := generateRunID()
runID := dockertestutil.GenerateRunID()
containerName := "headscale-test-suite-" + runID
logsDir := filepath.Join(config.LogsDir, runID)
@ -91,8 +92,10 @@ func runTestContainer(ctx context.Context, config *RunConfig) error {
exitCode, err := streamAndWait(ctx, cli, resp.ID)
// Give the container a moment to flush any final artifacts
time.Sleep(2 * time.Second)
// Ensure all containers have finished and logs are flushed before extracting artifacts
if waitErr := waitForContainerFinalization(ctx, cli, resp.ID, config.Verbose); waitErr != nil && config.Verbose {
log.Printf("Warning: failed to wait for container finalization: %v", waitErr)
}
// Extract artifacts from test containers before cleanup
if err := extractArtifactsFromContainers(ctx, resp.ID, logsDir, config.Verbose); err != nil && config.Verbose {
@ -152,7 +155,7 @@ func createGoTestContainer(ctx context.Context, cli *client.Client, config *RunC
projectRoot := findProjectRoot(pwd)
runID := generateRunIDFromContainerName(containerName)
runID := dockertestutil.ExtractRunIDFromContainerName(containerName)
env := []string{
fmt.Sprintf("HEADSCALE_INTEGRATION_POSTGRES=%d", boolToInt(config.UsePostgres)),
@ -225,23 +228,69 @@ func streamAndWait(ctx context.Context, cli *client.Client, containerID string)
return -1, ErrUnexpectedContainerWait
}
// generateRunID creates a unique timestamp-based run identifier.
func generateRunID() string {
now := time.Now()
timestamp := now.Format("20060102-150405")
return timestamp
// waitForContainerFinalization ensures all test containers have properly finished and flushed their output.
func waitForContainerFinalization(ctx context.Context, cli *client.Client, testContainerID string, verbose bool) error {
// First, get all related test containers
containers, err := cli.ContainerList(ctx, container.ListOptions{All: true})
if err != nil {
return fmt.Errorf("failed to list containers: %w", err)
}
testContainers := getCurrentTestContainers(containers, testContainerID, verbose)
// Wait for all test containers to reach a final state
maxWaitTime := 10 * time.Second
checkInterval := 500 * time.Millisecond
timeout := time.After(maxWaitTime)
ticker := time.NewTicker(checkInterval)
defer ticker.Stop()
for {
select {
case <-timeout:
if verbose {
log.Printf("Timeout waiting for container finalization, proceeding with artifact extraction")
}
return nil
case <-ticker.C:
allFinalized := true
for _, testCont := range testContainers {
inspect, err := cli.ContainerInspect(ctx, testCont.ID)
if err != nil {
if verbose {
log.Printf("Warning: failed to inspect container %s: %v", testCont.name, err)
}
continue
}
// Check if container is in a final state
if !isContainerFinalized(inspect.State) {
allFinalized = false
if verbose {
log.Printf("Container %s still finalizing (state: %s)", testCont.name, inspect.State.Status)
}
break
}
}
if allFinalized {
if verbose {
log.Printf("All test containers finalized, ready for artifact extraction")
}
return nil
}
}
}
}
// generateRunIDFromContainerName extracts the run ID from container name.
func generateRunIDFromContainerName(containerName string) string {
// Extract run ID from container name like "headscale-test-suite-20250618-143802"
parts := strings.Split(containerName, "-")
if len(parts) >= 2 {
return strings.Join(parts[len(parts)-2:], "-")
}
return containerName
// isContainerFinalized checks if a container has reached a final state where logs are flushed.
func isContainerFinalized(state *container.State) bool {
// Container is finalized if it's not running and has a finish time
return !state.Running && state.FinishedAt != ""
}
// findProjectRoot locates the project root by finding the directory containing go.mod.
func findProjectRoot(startPath string) string {
current := startPath
@ -466,11 +515,8 @@ func getCurrentTestContainers(containers []container.Summary, testContainerID st
}
if runID == "" {
if verbose {
log.Printf("Warning: could not find run ID for test container %s, falling back to time-based filtering", testContainerID[:12])
}
// Fallback to time-based filtering for backward compatibility
return getCurrentTestContainersByTime(containers, testContainerID, verbose)
log.Printf("Error: test container %s missing required hi.run-id label", testContainerID[:12])
return testRunContainers
}
if verbose {
@ -570,78 +616,39 @@ func extractContainerLogs(ctx context.Context, cli *client.Client, containerID,
return nil
}
// getCurrentTestContainersByTime is a fallback method for containers without labels.
func getCurrentTestContainersByTime(containers []container.Summary, testContainerID string, verbose bool) []testContainer {
var testRunContainers []testContainer
// Find the test container to get its creation time
var testContainerCreated time.Time
for _, cont := range containers {
if cont.ID == testContainerID {
testContainerCreated = time.Unix(cont.Created, 0)
break
}
}
if testContainerCreated.IsZero() {
if verbose {
log.Printf("Warning: could not find test container %s", testContainerID[:12])
}
return testRunContainers
}
// Find containers created within a small time window after the test container
startTime := testContainerCreated
endTime := testContainerCreated.Add(5 * time.Minute)
for _, cont := range containers {
for _, name := range cont.Names {
containerName := strings.TrimPrefix(name, "/")
if strings.HasPrefix(containerName, "hs-") || strings.HasPrefix(containerName, "ts-") {
createdTime := time.Unix(cont.Created, 0)
if createdTime.After(startTime) && createdTime.Before(endTime) {
testRunContainers = append(testRunContainers, testContainer{
ID: cont.ID,
name: containerName,
})
if verbose {
log.Printf("Including container %s (created %s)", containerName, createdTime.Format("15:04:05"))
}
}
break
}
}
}
return testRunContainers
}
// extractContainerFiles extracts database file and directories from headscale containers.
func extractContainerFiles(ctx context.Context, cli *client.Client, containerID, containerName, logsDir string, verbose bool) error {
// Extract database file
if err := extractSingleFile(ctx, cli, containerID, "/tmp/integration_test_db.sqlite3", containerName+".db", logsDir, verbose); err != nil {
if verbose {
log.Printf("Warning: failed to extract database from %s: %v", containerName, err)
}
logExtractionError("database", containerName, err, verbose)
}
// Extract profile directory
if err := extractDirectory(ctx, cli, containerID, "/tmp/profile", containerName+".pprof", logsDir, verbose); err != nil {
if verbose {
log.Printf("Warning: failed to extract profile from %s: %v", containerName, err)
}
logExtractionError("profile directory", containerName, err, verbose)
}
// Extract map responses directory
if err := extractDirectory(ctx, cli, containerID, "/tmp/mapresponses", containerName+".mapresp", logsDir, verbose); err != nil {
if verbose {
log.Printf("Warning: failed to extract mapresponses from %s: %v", containerName, err)
}
logExtractionError("mapresponses directory", containerName, err, verbose)
}
return nil
}
// logExtractionError logs extraction errors with appropriate level based on error type.
func logExtractionError(artifactType, containerName string, err error, verbose bool) {
if errors.Is(err, ErrFileNotFoundInTar) {
// File not found is expected and only logged in verbose mode
if verbose {
log.Printf("No %s found in container %s", artifactType, containerName)
}
} else {
// Other errors are actual failures and should be logged as warnings
log.Printf("Warning: failed to extract %s from %s: %v", artifactType, containerName, err)
}
}
// extractSingleFile copies a single file from a container.
func extractSingleFile(ctx context.Context, cli *client.Client, containerID, sourcePath, fileName, logsDir string, verbose bool) error {
tarReader, _, err := cli.CopyFromContainer(ctx, containerID, sourcePath)

View File

@ -2,6 +2,7 @@ package main
import (
"archive/tar"
"errors"
"fmt"
"io"
"os"
@ -9,6 +10,11 @@ import (
"strings"
)
var (
// ErrFileNotFoundInTar indicates a file was not found in the tar archive.
ErrFileNotFoundInTar = errors.New("file not found in tar")
)
// extractFileFromTar extracts a single file from a tar reader.
func extractFileFromTar(tarReader io.Reader, fileName, outputPath string) error {
tr := tar.NewReader(tarReader)
@ -41,7 +47,7 @@ func extractFileFromTar(tarReader io.Reader, fileName, outputPath string) error
}
}
return fmt.Errorf("file %s not found in tar", fileName)
return fmt.Errorf("%w: %s", ErrFileNotFoundInTar, fileName)
}
// extractDirectoryFromTar extracts all files from a tar reader to a target directory.

View File

@ -1,8 +1,12 @@
package dockertestutil
import (
"fmt"
"os"
"strings"
"time"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/ory/dockertest/v3"
)
@ -18,8 +22,7 @@ func GetIntegrationRunID() string {
func DockerAddIntegrationLabels(opts *dockertest.RunOptions, testType string) {
runID := GetIntegrationRunID()
if runID == "" {
// If no run ID is set, do nothing for backward compatibility
return
panic("HEADSCALE_INTEGRATION_RUN_ID environment variable is required")
}
if opts.Labels == nil {
@ -29,6 +32,29 @@ func DockerAddIntegrationLabels(opts *dockertest.RunOptions, testType string) {
opts.Labels["hi.test-type"] = testType
}
// GenerateRunID creates a unique run identifier with timestamp and random hash.
// Format: YYYYMMDD-HHMMSS-HASH (e.g., 20250619-143052-a1b2c3)
func GenerateRunID() string {
now := time.Now()
timestamp := now.Format("20060102-150405")
// Add a short random hash to ensure uniqueness
randomHash := util.MustGenerateRandomStringDNSSafe(6)
return fmt.Sprintf("%s-%s", timestamp, randomHash)
}
// ExtractRunIDFromContainerName extracts the run ID from container name.
// Expects format: "prefix-YYYYMMDD-HHMMSS-HASH"
func ExtractRunIDFromContainerName(containerName string) string {
parts := strings.Split(containerName, "-")
if len(parts) >= 3 {
// Return the last three parts as the run ID (YYYYMMDD-HHMMSS-HASH)
return strings.Join(parts[len(parts)-3:], "-")
}
panic(fmt.Sprintf("unexpected container name format: %s", containerName))
}
// IsRunningInContainer checks if the current process is running inside a Docker container.
// This is used by tests to determine if they should run integration tests.
func IsRunningInContainer() bool {

View File

@ -280,6 +280,9 @@ func New(
return nil, err
}
// Add integration test labels if running under hi tool
dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale")
var container *dockertest.Resource
if version != VersionHead {
@ -311,9 +314,6 @@ func New(
)
}
// Add integration test labels if running under hi tool
dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale")
container, err = pool.BuildAndRunWithBuildOptions(
buildOptions,
tailscaleOptions,
@ -325,9 +325,6 @@ func New(
tailscaleOptions.Repository = "tailscale/tailscale"
tailscaleOptions.Tag = version
// Add integration test labels if running under hi tool
dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale")
container, err = pool.RunWithOptions(
tailscaleOptions,
dockertestutil.DockerRestartPolicy,
@ -338,9 +335,6 @@ func New(
tailscaleOptions.Repository = "tailscale/tailscale"
tailscaleOptions.Tag = "v" + version
// Add integration test labels if running under hi tool
dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale")
container, err = pool.RunWithOptions(
tailscaleOptions,
dockertestutil.DockerRestartPolicy,