From 3c8e194d8bead1a74f73e49f59d88e51328a0c3a Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Thu, 19 Jun 2025 15:19:52 +0200 Subject: [PATCH] cmd/hi: improve reliability and remove backwards compatibility - Add retry logic for container removal with exponential backoff - Replace arbitrary sleep with deterministic container finalization checking - Use label-based container discovery exclusively (remove time-based fallbacks) - Add random hash to run IDs to prevent collisions (YYYYMMDD-HHMMSS-HASH) - Create shared utility functions for consistent run ID handling - Distinguish between expected and unexpected extraction errors - Consolidate duplicate labeling calls in Tailscale container creation - Remove backwards compatibility code for cleaner behavior --- cmd/hi/cleanup.go | 41 ++++++- cmd/hi/docker.go | 161 ++++++++++++++------------- cmd/hi/tar_utils.go | 8 +- integration/dockertestutil/config.go | 30 ++++- integration/tsic/tsic.go | 12 +- 5 files changed, 158 insertions(+), 94 deletions(-) diff --git a/cmd/hi/cleanup.go b/cmd/hi/cleanup.go index 17fdb594..080266d8 100644 --- a/cmd/hi/cleanup.go +++ b/cmd/hi/cleanup.go @@ -10,6 +10,7 @@ import ( "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/image" "github.com/docker/docker/client" + "github.com/docker/docker/errdefs" ) // cleanupBeforeTest performs cleanup operations before running tests. @@ -66,10 +67,8 @@ func killTestContainers(ctx context.Context) error { _ = cli.ContainerKill(ctx, cont.ID, "KILL") } - // Then remove the container - if err := cli.ContainerRemove(ctx, cont.ID, container.RemoveOptions{ - Force: true, - }); err == nil { + // Then remove the container with retry logic + if removeContainerWithRetry(ctx, cli, cont.ID) { removed++ } } @@ -84,6 +83,32 @@ func killTestContainers(ctx context.Context) error { return nil } +// removeContainerWithRetry attempts to remove a container with exponential backoff retry logic. +func removeContainerWithRetry(ctx context.Context, cli *client.Client, containerID string) bool { + maxRetries := 3 + baseDelay := 100 * time.Millisecond + + for attempt := 0; attempt < maxRetries; attempt++ { + err := cli.ContainerRemove(ctx, containerID, container.RemoveOptions{ + Force: true, + }) + if err == nil { + return true + } + + // If this is the last attempt, don't wait + if attempt == maxRetries-1 { + break + } + + // Wait with exponential backoff + delay := baseDelay * time.Duration(1<= 2 { - return strings.Join(parts[len(parts)-2:], "-") - } - return containerName +// isContainerFinalized checks if a container has reached a final state where logs are flushed. +func isContainerFinalized(state *container.State) bool { + // Container is finalized if it's not running and has a finish time + return !state.Running && state.FinishedAt != "" } + // findProjectRoot locates the project root by finding the directory containing go.mod. func findProjectRoot(startPath string) string { current := startPath @@ -466,11 +515,8 @@ func getCurrentTestContainers(containers []container.Summary, testContainerID st } if runID == "" { - if verbose { - log.Printf("Warning: could not find run ID for test container %s, falling back to time-based filtering", testContainerID[:12]) - } - // Fallback to time-based filtering for backward compatibility - return getCurrentTestContainersByTime(containers, testContainerID, verbose) + log.Printf("Error: test container %s missing required hi.run-id label", testContainerID[:12]) + return testRunContainers } if verbose { @@ -570,78 +616,39 @@ func extractContainerLogs(ctx context.Context, cli *client.Client, containerID, return nil } -// getCurrentTestContainersByTime is a fallback method for containers without labels. -func getCurrentTestContainersByTime(containers []container.Summary, testContainerID string, verbose bool) []testContainer { - var testRunContainers []testContainer - - // Find the test container to get its creation time - var testContainerCreated time.Time - for _, cont := range containers { - if cont.ID == testContainerID { - testContainerCreated = time.Unix(cont.Created, 0) - break - } - } - - if testContainerCreated.IsZero() { - if verbose { - log.Printf("Warning: could not find test container %s", testContainerID[:12]) - } - return testRunContainers - } - - // Find containers created within a small time window after the test container - startTime := testContainerCreated - endTime := testContainerCreated.Add(5 * time.Minute) - - for _, cont := range containers { - for _, name := range cont.Names { - containerName := strings.TrimPrefix(name, "/") - if strings.HasPrefix(containerName, "hs-") || strings.HasPrefix(containerName, "ts-") { - createdTime := time.Unix(cont.Created, 0) - if createdTime.After(startTime) && createdTime.Before(endTime) { - testRunContainers = append(testRunContainers, testContainer{ - ID: cont.ID, - name: containerName, - }) - if verbose { - log.Printf("Including container %s (created %s)", containerName, createdTime.Format("15:04:05")) - } - } - break - } - } - } - - return testRunContainers -} - // extractContainerFiles extracts database file and directories from headscale containers. func extractContainerFiles(ctx context.Context, cli *client.Client, containerID, containerName, logsDir string, verbose bool) error { // Extract database file if err := extractSingleFile(ctx, cli, containerID, "/tmp/integration_test_db.sqlite3", containerName+".db", logsDir, verbose); err != nil { - if verbose { - log.Printf("Warning: failed to extract database from %s: %v", containerName, err) - } + logExtractionError("database", containerName, err, verbose) } // Extract profile directory if err := extractDirectory(ctx, cli, containerID, "/tmp/profile", containerName+".pprof", logsDir, verbose); err != nil { - if verbose { - log.Printf("Warning: failed to extract profile from %s: %v", containerName, err) - } + logExtractionError("profile directory", containerName, err, verbose) } // Extract map responses directory if err := extractDirectory(ctx, cli, containerID, "/tmp/mapresponses", containerName+".mapresp", logsDir, verbose); err != nil { - if verbose { - log.Printf("Warning: failed to extract mapresponses from %s: %v", containerName, err) - } + logExtractionError("mapresponses directory", containerName, err, verbose) } return nil } +// logExtractionError logs extraction errors with appropriate level based on error type. +func logExtractionError(artifactType, containerName string, err error, verbose bool) { + if errors.Is(err, ErrFileNotFoundInTar) { + // File not found is expected and only logged in verbose mode + if verbose { + log.Printf("No %s found in container %s", artifactType, containerName) + } + } else { + // Other errors are actual failures and should be logged as warnings + log.Printf("Warning: failed to extract %s from %s: %v", artifactType, containerName, err) + } +} + // extractSingleFile copies a single file from a container. func extractSingleFile(ctx context.Context, cli *client.Client, containerID, sourcePath, fileName, logsDir string, verbose bool) error { tarReader, _, err := cli.CopyFromContainer(ctx, containerID, sourcePath) diff --git a/cmd/hi/tar_utils.go b/cmd/hi/tar_utils.go index 46f21987..16fb8793 100644 --- a/cmd/hi/tar_utils.go +++ b/cmd/hi/tar_utils.go @@ -2,6 +2,7 @@ package main import ( "archive/tar" + "errors" "fmt" "io" "os" @@ -9,6 +10,11 @@ import ( "strings" ) +var ( + // ErrFileNotFoundInTar indicates a file was not found in the tar archive. + ErrFileNotFoundInTar = errors.New("file not found in tar") +) + // extractFileFromTar extracts a single file from a tar reader. func extractFileFromTar(tarReader io.Reader, fileName, outputPath string) error { tr := tar.NewReader(tarReader) @@ -41,7 +47,7 @@ func extractFileFromTar(tarReader io.Reader, fileName, outputPath string) error } } - return fmt.Errorf("file %s not found in tar", fileName) + return fmt.Errorf("%w: %s", ErrFileNotFoundInTar, fileName) } // extractDirectoryFromTar extracts all files from a tar reader to a target directory. diff --git a/integration/dockertestutil/config.go b/integration/dockertestutil/config.go index 269f103a..f8bbde5f 100644 --- a/integration/dockertestutil/config.go +++ b/integration/dockertestutil/config.go @@ -1,8 +1,12 @@ package dockertestutil import ( + "fmt" "os" + "strings" + "time" + "github.com/juanfont/headscale/hscontrol/util" "github.com/ory/dockertest/v3" ) @@ -18,8 +22,7 @@ func GetIntegrationRunID() string { func DockerAddIntegrationLabels(opts *dockertest.RunOptions, testType string) { runID := GetIntegrationRunID() if runID == "" { - // If no run ID is set, do nothing for backward compatibility - return + panic("HEADSCALE_INTEGRATION_RUN_ID environment variable is required") } if opts.Labels == nil { @@ -29,6 +32,29 @@ func DockerAddIntegrationLabels(opts *dockertest.RunOptions, testType string) { opts.Labels["hi.test-type"] = testType } +// GenerateRunID creates a unique run identifier with timestamp and random hash. +// Format: YYYYMMDD-HHMMSS-HASH (e.g., 20250619-143052-a1b2c3) +func GenerateRunID() string { + now := time.Now() + timestamp := now.Format("20060102-150405") + + // Add a short random hash to ensure uniqueness + randomHash := util.MustGenerateRandomStringDNSSafe(6) + return fmt.Sprintf("%s-%s", timestamp, randomHash) +} + +// ExtractRunIDFromContainerName extracts the run ID from container name. +// Expects format: "prefix-YYYYMMDD-HHMMSS-HASH" +func ExtractRunIDFromContainerName(containerName string) string { + parts := strings.Split(containerName, "-") + if len(parts) >= 3 { + // Return the last three parts as the run ID (YYYYMMDD-HHMMSS-HASH) + return strings.Join(parts[len(parts)-3:], "-") + } + + panic(fmt.Sprintf("unexpected container name format: %s", containerName)) +} + // IsRunningInContainer checks if the current process is running inside a Docker container. // This is used by tests to determine if they should run integration tests. func IsRunningInContainer() bool { diff --git a/integration/tsic/tsic.go b/integration/tsic/tsic.go index cad60816..d2738c55 100644 --- a/integration/tsic/tsic.go +++ b/integration/tsic/tsic.go @@ -280,6 +280,9 @@ func New( return nil, err } + // Add integration test labels if running under hi tool + dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale") + var container *dockertest.Resource if version != VersionHead { @@ -311,9 +314,6 @@ func New( ) } - // Add integration test labels if running under hi tool - dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale") - container, err = pool.BuildAndRunWithBuildOptions( buildOptions, tailscaleOptions, @@ -325,9 +325,6 @@ func New( tailscaleOptions.Repository = "tailscale/tailscale" tailscaleOptions.Tag = version - // Add integration test labels if running under hi tool - dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale") - container, err = pool.RunWithOptions( tailscaleOptions, dockertestutil.DockerRestartPolicy, @@ -338,9 +335,6 @@ func New( tailscaleOptions.Repository = "tailscale/tailscale" tailscaleOptions.Tag = "v" + version - // Add integration test labels if running under hi tool - dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale") - container, err = pool.RunWithOptions( tailscaleOptions, dockertestutil.DockerRestartPolicy,