mirror of
https://github.com/juanfont/headscale.git
synced 2025-08-10 13:46:46 +02:00
cmd/hi: improve reliability and remove backwards compatibility
- Add retry logic for container removal with exponential backoff - Replace arbitrary sleep with deterministic container finalization checking - Use label-based container discovery exclusively (remove time-based fallbacks) - Add random hash to run IDs to prevent collisions (YYYYMMDD-HHMMSS-HASH) - Create shared utility functions for consistent run ID handling - Distinguish between expected and unexpected extraction errors - Consolidate duplicate labeling calls in Tailscale container creation - Remove backwards compatibility code for cleaner behavior
This commit is contained in:
parent
8fae7edd60
commit
3c8e194d8b
@ -10,6 +10,7 @@ import (
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/api/types/image"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/docker/docker/errdefs"
|
||||
)
|
||||
|
||||
// cleanupBeforeTest performs cleanup operations before running tests.
|
||||
@ -66,10 +67,8 @@ func killTestContainers(ctx context.Context) error {
|
||||
_ = cli.ContainerKill(ctx, cont.ID, "KILL")
|
||||
}
|
||||
|
||||
// Then remove the container
|
||||
if err := cli.ContainerRemove(ctx, cont.ID, container.RemoveOptions{
|
||||
Force: true,
|
||||
}); err == nil {
|
||||
// Then remove the container with retry logic
|
||||
if removeContainerWithRetry(ctx, cli, cont.ID) {
|
||||
removed++
|
||||
}
|
||||
}
|
||||
@ -84,6 +83,32 @@ func killTestContainers(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// removeContainerWithRetry attempts to remove a container with exponential backoff retry logic.
|
||||
func removeContainerWithRetry(ctx context.Context, cli *client.Client, containerID string) bool {
|
||||
maxRetries := 3
|
||||
baseDelay := 100 * time.Millisecond
|
||||
|
||||
for attempt := 0; attempt < maxRetries; attempt++ {
|
||||
err := cli.ContainerRemove(ctx, containerID, container.RemoveOptions{
|
||||
Force: true,
|
||||
})
|
||||
if err == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// If this is the last attempt, don't wait
|
||||
if attempt == maxRetries-1 {
|
||||
break
|
||||
}
|
||||
|
||||
// Wait with exponential backoff
|
||||
delay := baseDelay * time.Duration(1<<attempt)
|
||||
time.Sleep(delay)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// pruneDockerNetworks removes unused Docker networks.
|
||||
func pruneDockerNetworks(ctx context.Context) error {
|
||||
cli, err := createDockerClient()
|
||||
@ -167,7 +192,13 @@ func cleanCacheVolume(ctx context.Context) error {
|
||||
volumeName := "hs-integration-go-cache"
|
||||
err = cli.VolumeRemove(ctx, volumeName, true)
|
||||
if err != nil {
|
||||
fmt.Printf("Go module cache volume not found or already removed\n")
|
||||
if errdefs.IsNotFound(err) {
|
||||
fmt.Printf("Go module cache volume not found: %s\n", volumeName)
|
||||
} else if errdefs.IsConflict(err) {
|
||||
fmt.Printf("Go module cache volume is in use and cannot be removed: %s\n", volumeName)
|
||||
} else {
|
||||
fmt.Printf("Failed to remove Go module cache volume %s: %v\n", volumeName, err)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Removed Go module cache volume: %s\n", volumeName)
|
||||
}
|
||||
|
161
cmd/hi/docker.go
161
cmd/hi/docker.go
@ -19,6 +19,7 @@ import (
|
||||
"github.com/docker/docker/api/types/mount"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/docker/docker/pkg/stdcopy"
|
||||
"github.com/juanfont/headscale/integration/dockertestutil"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -35,7 +36,7 @@ func runTestContainer(ctx context.Context, config *RunConfig) error {
|
||||
}
|
||||
defer cli.Close()
|
||||
|
||||
runID := generateRunID()
|
||||
runID := dockertestutil.GenerateRunID()
|
||||
containerName := "headscale-test-suite-" + runID
|
||||
logsDir := filepath.Join(config.LogsDir, runID)
|
||||
|
||||
@ -91,8 +92,10 @@ func runTestContainer(ctx context.Context, config *RunConfig) error {
|
||||
|
||||
exitCode, err := streamAndWait(ctx, cli, resp.ID)
|
||||
|
||||
// Give the container a moment to flush any final artifacts
|
||||
time.Sleep(2 * time.Second)
|
||||
// Ensure all containers have finished and logs are flushed before extracting artifacts
|
||||
if waitErr := waitForContainerFinalization(ctx, cli, resp.ID, config.Verbose); waitErr != nil && config.Verbose {
|
||||
log.Printf("Warning: failed to wait for container finalization: %v", waitErr)
|
||||
}
|
||||
|
||||
// Extract artifacts from test containers before cleanup
|
||||
if err := extractArtifactsFromContainers(ctx, resp.ID, logsDir, config.Verbose); err != nil && config.Verbose {
|
||||
@ -152,7 +155,7 @@ func createGoTestContainer(ctx context.Context, cli *client.Client, config *RunC
|
||||
|
||||
projectRoot := findProjectRoot(pwd)
|
||||
|
||||
runID := generateRunIDFromContainerName(containerName)
|
||||
runID := dockertestutil.ExtractRunIDFromContainerName(containerName)
|
||||
|
||||
env := []string{
|
||||
fmt.Sprintf("HEADSCALE_INTEGRATION_POSTGRES=%d", boolToInt(config.UsePostgres)),
|
||||
@ -225,23 +228,69 @@ func streamAndWait(ctx context.Context, cli *client.Client, containerID string)
|
||||
return -1, ErrUnexpectedContainerWait
|
||||
}
|
||||
|
||||
// generateRunID creates a unique timestamp-based run identifier.
|
||||
func generateRunID() string {
|
||||
now := time.Now()
|
||||
timestamp := now.Format("20060102-150405")
|
||||
return timestamp
|
||||
// waitForContainerFinalization ensures all test containers have properly finished and flushed their output.
|
||||
func waitForContainerFinalization(ctx context.Context, cli *client.Client, testContainerID string, verbose bool) error {
|
||||
// First, get all related test containers
|
||||
containers, err := cli.ContainerList(ctx, container.ListOptions{All: true})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to list containers: %w", err)
|
||||
}
|
||||
|
||||
testContainers := getCurrentTestContainers(containers, testContainerID, verbose)
|
||||
|
||||
// Wait for all test containers to reach a final state
|
||||
maxWaitTime := 10 * time.Second
|
||||
checkInterval := 500 * time.Millisecond
|
||||
timeout := time.After(maxWaitTime)
|
||||
ticker := time.NewTicker(checkInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-timeout:
|
||||
if verbose {
|
||||
log.Printf("Timeout waiting for container finalization, proceeding with artifact extraction")
|
||||
}
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
allFinalized := true
|
||||
|
||||
for _, testCont := range testContainers {
|
||||
inspect, err := cli.ContainerInspect(ctx, testCont.ID)
|
||||
if err != nil {
|
||||
if verbose {
|
||||
log.Printf("Warning: failed to inspect container %s: %v", testCont.name, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if container is in a final state
|
||||
if !isContainerFinalized(inspect.State) {
|
||||
allFinalized = false
|
||||
if verbose {
|
||||
log.Printf("Container %s still finalizing (state: %s)", testCont.name, inspect.State.Status)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if allFinalized {
|
||||
if verbose {
|
||||
log.Printf("All test containers finalized, ready for artifact extraction")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// generateRunIDFromContainerName extracts the run ID from container name.
|
||||
func generateRunIDFromContainerName(containerName string) string {
|
||||
// Extract run ID from container name like "headscale-test-suite-20250618-143802"
|
||||
parts := strings.Split(containerName, "-")
|
||||
if len(parts) >= 2 {
|
||||
return strings.Join(parts[len(parts)-2:], "-")
|
||||
}
|
||||
return containerName
|
||||
// isContainerFinalized checks if a container has reached a final state where logs are flushed.
|
||||
func isContainerFinalized(state *container.State) bool {
|
||||
// Container is finalized if it's not running and has a finish time
|
||||
return !state.Running && state.FinishedAt != ""
|
||||
}
|
||||
|
||||
|
||||
// findProjectRoot locates the project root by finding the directory containing go.mod.
|
||||
func findProjectRoot(startPath string) string {
|
||||
current := startPath
|
||||
@ -466,11 +515,8 @@ func getCurrentTestContainers(containers []container.Summary, testContainerID st
|
||||
}
|
||||
|
||||
if runID == "" {
|
||||
if verbose {
|
||||
log.Printf("Warning: could not find run ID for test container %s, falling back to time-based filtering", testContainerID[:12])
|
||||
}
|
||||
// Fallback to time-based filtering for backward compatibility
|
||||
return getCurrentTestContainersByTime(containers, testContainerID, verbose)
|
||||
log.Printf("Error: test container %s missing required hi.run-id label", testContainerID[:12])
|
||||
return testRunContainers
|
||||
}
|
||||
|
||||
if verbose {
|
||||
@ -570,78 +616,39 @@ func extractContainerLogs(ctx context.Context, cli *client.Client, containerID,
|
||||
return nil
|
||||
}
|
||||
|
||||
// getCurrentTestContainersByTime is a fallback method for containers without labels.
|
||||
func getCurrentTestContainersByTime(containers []container.Summary, testContainerID string, verbose bool) []testContainer {
|
||||
var testRunContainers []testContainer
|
||||
|
||||
// Find the test container to get its creation time
|
||||
var testContainerCreated time.Time
|
||||
for _, cont := range containers {
|
||||
if cont.ID == testContainerID {
|
||||
testContainerCreated = time.Unix(cont.Created, 0)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if testContainerCreated.IsZero() {
|
||||
if verbose {
|
||||
log.Printf("Warning: could not find test container %s", testContainerID[:12])
|
||||
}
|
||||
return testRunContainers
|
||||
}
|
||||
|
||||
// Find containers created within a small time window after the test container
|
||||
startTime := testContainerCreated
|
||||
endTime := testContainerCreated.Add(5 * time.Minute)
|
||||
|
||||
for _, cont := range containers {
|
||||
for _, name := range cont.Names {
|
||||
containerName := strings.TrimPrefix(name, "/")
|
||||
if strings.HasPrefix(containerName, "hs-") || strings.HasPrefix(containerName, "ts-") {
|
||||
createdTime := time.Unix(cont.Created, 0)
|
||||
if createdTime.After(startTime) && createdTime.Before(endTime) {
|
||||
testRunContainers = append(testRunContainers, testContainer{
|
||||
ID: cont.ID,
|
||||
name: containerName,
|
||||
})
|
||||
if verbose {
|
||||
log.Printf("Including container %s (created %s)", containerName, createdTime.Format("15:04:05"))
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return testRunContainers
|
||||
}
|
||||
|
||||
// extractContainerFiles extracts database file and directories from headscale containers.
|
||||
func extractContainerFiles(ctx context.Context, cli *client.Client, containerID, containerName, logsDir string, verbose bool) error {
|
||||
// Extract database file
|
||||
if err := extractSingleFile(ctx, cli, containerID, "/tmp/integration_test_db.sqlite3", containerName+".db", logsDir, verbose); err != nil {
|
||||
if verbose {
|
||||
log.Printf("Warning: failed to extract database from %s: %v", containerName, err)
|
||||
}
|
||||
logExtractionError("database", containerName, err, verbose)
|
||||
}
|
||||
|
||||
// Extract profile directory
|
||||
if err := extractDirectory(ctx, cli, containerID, "/tmp/profile", containerName+".pprof", logsDir, verbose); err != nil {
|
||||
if verbose {
|
||||
log.Printf("Warning: failed to extract profile from %s: %v", containerName, err)
|
||||
}
|
||||
logExtractionError("profile directory", containerName, err, verbose)
|
||||
}
|
||||
|
||||
// Extract map responses directory
|
||||
if err := extractDirectory(ctx, cli, containerID, "/tmp/mapresponses", containerName+".mapresp", logsDir, verbose); err != nil {
|
||||
if verbose {
|
||||
log.Printf("Warning: failed to extract mapresponses from %s: %v", containerName, err)
|
||||
}
|
||||
logExtractionError("mapresponses directory", containerName, err, verbose)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// logExtractionError logs extraction errors with appropriate level based on error type.
|
||||
func logExtractionError(artifactType, containerName string, err error, verbose bool) {
|
||||
if errors.Is(err, ErrFileNotFoundInTar) {
|
||||
// File not found is expected and only logged in verbose mode
|
||||
if verbose {
|
||||
log.Printf("No %s found in container %s", artifactType, containerName)
|
||||
}
|
||||
} else {
|
||||
// Other errors are actual failures and should be logged as warnings
|
||||
log.Printf("Warning: failed to extract %s from %s: %v", artifactType, containerName, err)
|
||||
}
|
||||
}
|
||||
|
||||
// extractSingleFile copies a single file from a container.
|
||||
func extractSingleFile(ctx context.Context, cli *client.Client, containerID, sourcePath, fileName, logsDir string, verbose bool) error {
|
||||
tarReader, _, err := cli.CopyFromContainer(ctx, containerID, sourcePath)
|
||||
|
@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@ -9,6 +10,11 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrFileNotFoundInTar indicates a file was not found in the tar archive.
|
||||
ErrFileNotFoundInTar = errors.New("file not found in tar")
|
||||
)
|
||||
|
||||
// extractFileFromTar extracts a single file from a tar reader.
|
||||
func extractFileFromTar(tarReader io.Reader, fileName, outputPath string) error {
|
||||
tr := tar.NewReader(tarReader)
|
||||
@ -41,7 +47,7 @@ func extractFileFromTar(tarReader io.Reader, fileName, outputPath string) error
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("file %s not found in tar", fileName)
|
||||
return fmt.Errorf("%w: %s", ErrFileNotFoundInTar, fileName)
|
||||
}
|
||||
|
||||
// extractDirectoryFromTar extracts all files from a tar reader to a target directory.
|
||||
|
@ -1,8 +1,12 @@
|
||||
package dockertestutil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/juanfont/headscale/hscontrol/util"
|
||||
"github.com/ory/dockertest/v3"
|
||||
)
|
||||
|
||||
@ -18,8 +22,7 @@ func GetIntegrationRunID() string {
|
||||
func DockerAddIntegrationLabels(opts *dockertest.RunOptions, testType string) {
|
||||
runID := GetIntegrationRunID()
|
||||
if runID == "" {
|
||||
// If no run ID is set, do nothing for backward compatibility
|
||||
return
|
||||
panic("HEADSCALE_INTEGRATION_RUN_ID environment variable is required")
|
||||
}
|
||||
|
||||
if opts.Labels == nil {
|
||||
@ -29,6 +32,29 @@ func DockerAddIntegrationLabels(opts *dockertest.RunOptions, testType string) {
|
||||
opts.Labels["hi.test-type"] = testType
|
||||
}
|
||||
|
||||
// GenerateRunID creates a unique run identifier with timestamp and random hash.
|
||||
// Format: YYYYMMDD-HHMMSS-HASH (e.g., 20250619-143052-a1b2c3)
|
||||
func GenerateRunID() string {
|
||||
now := time.Now()
|
||||
timestamp := now.Format("20060102-150405")
|
||||
|
||||
// Add a short random hash to ensure uniqueness
|
||||
randomHash := util.MustGenerateRandomStringDNSSafe(6)
|
||||
return fmt.Sprintf("%s-%s", timestamp, randomHash)
|
||||
}
|
||||
|
||||
// ExtractRunIDFromContainerName extracts the run ID from container name.
|
||||
// Expects format: "prefix-YYYYMMDD-HHMMSS-HASH"
|
||||
func ExtractRunIDFromContainerName(containerName string) string {
|
||||
parts := strings.Split(containerName, "-")
|
||||
if len(parts) >= 3 {
|
||||
// Return the last three parts as the run ID (YYYYMMDD-HHMMSS-HASH)
|
||||
return strings.Join(parts[len(parts)-3:], "-")
|
||||
}
|
||||
|
||||
panic(fmt.Sprintf("unexpected container name format: %s", containerName))
|
||||
}
|
||||
|
||||
// IsRunningInContainer checks if the current process is running inside a Docker container.
|
||||
// This is used by tests to determine if they should run integration tests.
|
||||
func IsRunningInContainer() bool {
|
||||
|
@ -280,6 +280,9 @@ func New(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Add integration test labels if running under hi tool
|
||||
dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale")
|
||||
|
||||
var container *dockertest.Resource
|
||||
|
||||
if version != VersionHead {
|
||||
@ -311,9 +314,6 @@ func New(
|
||||
)
|
||||
}
|
||||
|
||||
// Add integration test labels if running under hi tool
|
||||
dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale")
|
||||
|
||||
container, err = pool.BuildAndRunWithBuildOptions(
|
||||
buildOptions,
|
||||
tailscaleOptions,
|
||||
@ -325,9 +325,6 @@ func New(
|
||||
tailscaleOptions.Repository = "tailscale/tailscale"
|
||||
tailscaleOptions.Tag = version
|
||||
|
||||
// Add integration test labels if running under hi tool
|
||||
dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale")
|
||||
|
||||
container, err = pool.RunWithOptions(
|
||||
tailscaleOptions,
|
||||
dockertestutil.DockerRestartPolicy,
|
||||
@ -338,9 +335,6 @@ func New(
|
||||
tailscaleOptions.Repository = "tailscale/tailscale"
|
||||
tailscaleOptions.Tag = "v" + version
|
||||
|
||||
// Add integration test labels if running under hi tool
|
||||
dockertestutil.DockerAddIntegrationLabels(tailscaleOptions, "tailscale")
|
||||
|
||||
container, err = pool.RunWithOptions(
|
||||
tailscaleOptions,
|
||||
dockertestutil.DockerRestartPolicy,
|
||||
|
Loading…
Reference in New Issue
Block a user