1
0
mirror of https://github.com/juanfont/headscale.git synced 2026-02-07 20:04:00 +01:00

integration: add regression tests for peer/subnet route installation

Add two integration tests to reproduce and bisect the route installation
regression introduced in tailscale/tailscale#18173 (commit c3b7f240):

TestPeerRouteInstallation: Verifies that peer /32 routes are installed
in kernel routing table 52 when using non-CGNAT IP prefixes. Tests
across multiple Tailscale versions including bisect commits around
c3b7f240 to pinpoint the regression. Confirmed results:
- before-c3b7f240 (e9d8276): PASS
- c3b7f240: FAIL (only 100.100.100.100 in table 52)
- after-c3b7f240 (5aeee1d): FAIL

TestSubnetRouteInstallation: Verifies that subnet routes (10.4.0.0/24)
and host routes (10.4.1.1/32) advertised by a peer are installed in
table 52 on accepting clients using standard CGNAT peer addressing with
--accept-routes. Both tests include bisect commits around c3b7f240. The
subnet route variant passes on all versions, confirming the regression
only affects non-CGNAT peer IPs without --accept-routes.

Updates #1941

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kristoffer Dalby 2026-02-03 10:23:28 +00:00
parent a65010d33b
commit df3a426eee

View File

@ -23,6 +23,7 @@ import (
"github.com/juanfont/headscale/integration/hsic"
"github.com/juanfont/headscale/integration/integrationutil"
"github.com/juanfont/headscale/integration/tsic"
"github.com/samber/lo"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
xmaps "golang.org/x/exp/maps"
@ -3068,3 +3069,482 @@ func TestSubnetRouteACLFiltering(t *testing.T) {
assertTracerouteViaIPWithCollect(c, tr, ip)
}, 60*time.Second, 200*time.Millisecond, "Verifying traceroute goes through router")
}
// TestPeerRouteInstallation verifies that peer /32 routes are installed in
// the kernel routing table (table 52) when Headscale uses non-CGNAT IP
// prefixes, and that peers can reach each other.
//
// This is a regression test for:
// - tailscale/tailscale#18587: Client >= 1.94 does not install peer /32
// routes when using Headscale control server
// - juanfont/headscale#3043: Cannot ping any host
//
// Tailscale PR #18173 (commit c3b7f240) changed peerRoutes() to gate
// non-CGNAT AllowedIPs on --accept-routes (RouteAll). Since Headscale
// users may configure non-CGNAT prefixes (e.g. 10.x.x.x), those peer
// /32 routes are no longer installed in table 52, breaking connectivity.
//
// The test verifies the full chain described in #18587:
// 1. Peers appear in tailscale status
// 2. Peers appear in tailscale debug netmap with correct AllowedIPs
// 3. Peer /32 routes are present in ip route show table 52
// 4. Peers can ping each other
func TestPeerRouteInstallation(t *testing.T) {
IntegrationSkip(t)
tests := []struct {
name string
version string
opts []tsic.Option
}{
{
name: "1.92",
version: "1.92",
},
{
// v1.94.1 contains the regression from PR #18173.
// No Docker image exists on Docker Hub, so we build from source.
name: "v1.94.1",
version: "head",
opts: []tsic.Option{tsic.WithTailscaleRef("v1.94.1")},
},
{
// Parent of suspect commit c3b7f240 (PR #18173).
// Should PASS if c3b7f240 is indeed the bad commit.
name: "before-c3b7f240",
version: "head",
opts: []tsic.Option{tsic.WithTailscaleRef("e9d82767e507108ed0f4eb0ff3b46a5625af7b0c")},
},
{
// The suspect commit: "ipn,ipn/local: always accept routes
// for Tailscale Services (cgnat range) (#18173)"
// Should FAIL if this is the bad commit.
name: "c3b7f240",
version: "head",
opts: []tsic.Option{tsic.WithTailscaleRef("c3b7f2405155c39b563b85801724dc8855d1fbdb")},
},
{
// First commit after c3b7f240 on main.
// Should also FAIL if c3b7f240 introduced the regression.
name: "after-c3b7f240",
version: "head",
opts: []tsic.Option{tsic.WithTailscaleRef("5aeee1d8a576b29ddc6b6b0a8c3b526142fa9c9b")},
},
{
name: "head",
version: "head",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
spec := ScenarioSpec{
NodesPerUser: 2,
Users: []string{"user"},
Versions: []string{tt.version},
}
scenario, err := NewScenario(spec)
require.NoErrorf(t, err, "failed to create scenario: %s", err)
defer scenario.ShutdownAssertNoPanics(t)
err = scenario.CreateHeadscaleEnv(
tt.opts,
hsic.WithTestName("peerroute"),
hsic.WithEmbeddedDERPServerOnly(),
hsic.WithTLS(),
// Use a non-CGNAT prefix to reproduce the regression.
// The bug only manifests with IPs outside 100.64.0.0/10 because
// peerRoutes() in Tailscale >= 1.94 unconditionally routes CGNAT
// single IPs but gates everything else on --accept-routes.
hsic.WithConfigEnv(map[string]string{
"HEADSCALE_PREFIXES_V4": "10.64.0.0/10",
}),
)
requireNoErrHeadscaleEnv(t, err)
allClients, err := scenario.ListTailscaleClients()
requireNoErrListClients(t, err)
allIps, err := scenario.ListTailscaleClientsIPs()
requireNoErrListClientIPs(t, err)
err = scenario.WaitForTailscaleSync()
requireNoErrSync(t, err)
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
return x.String()
})
// Log the Tailscale version and assigned IPs for debugging.
for _, client := range allClients {
ver, _, _ := client.Execute([]string{"tailscale", "version"})
t.Logf("client %s running: %s",
client.Hostname(),
strings.TrimSpace(strings.Split(ver, "\n")[0]))
}
t.Logf("all IPs: %v", allAddrs)
// Diagnostic 1: Verify peers appear in tailscale status with
// correct IPs (as described in #18587: "peers visible in
// tailscale status").
for _, client := range allClients {
assert.EventuallyWithT(t, func(c *assert.CollectT) {
status, err := client.Status()
assert.NoError(c, err)
assert.True(c, status.Self.Online,
"client %s should be online", client.Hostname())
assert.NotEmpty(c, status.Peer,
"client %s should have peers", client.Hostname())
for peerKey, peerStatus := range status.Peer {
assert.True(c, peerStatus.Online,
"peer %s should be online from %s perspective",
peerKey.ShortString(), client.Hostname())
// Verify peer has TailscaleIPs assigned from our
// non-CGNAT prefix.
assert.NotEmpty(c, peerStatus.TailscaleIPs,
"peer %s should have TailscaleIPs",
peerStatus.HostName)
}
}, 10*time.Second, 1*time.Second,
"peers should be visible in tailscale status for %s",
client.Hostname())
}
// Diagnostic 2: Verify netmap contains peers with AllowedIPs
// including the non-CGNAT /32 addresses (as described in #18587:
// "peers present in tailscale debug netmap, AllowedIPs include
// peer /32").
for _, client := range allClients {
clientIPv4 := client.MustIPv4()
peerIPs := lo.Filter(allIps, func(ip netip.Addr, _ int) bool {
return ip.Is4() && ip != clientIPv4
})
assert.EventuallyWithT(t, func(c *assert.CollectT) {
nm, err := client.Netmap()
assert.NoError(c, err)
for _, peerIP := range peerIPs {
peerPrefix := netip.PrefixFrom(peerIP, peerIP.BitLen())
found := false
for _, peer := range nm.Peers {
aips := peer.AllowedIPs()
for i := range aips.Len() {
if aips.At(i) == peerPrefix {
found = true
break
}
}
if found {
break
}
}
assert.Truef(c, found,
"client %s: peer IP %s/32 not found in any peer's AllowedIPs in netmap",
clientIPv4, peerIP)
}
}, 10*time.Second, 1*time.Second,
"netmap should contain peer AllowedIPs for %s",
clientIPv4)
}
// Diagnostic 3: Dump routing diagnostics to the tailscale
// container's PID 1 stderr so they appear in extracted
// log files (docker exec stdout goes to the test runner
// which isn't captured).
for _, client := range allClients {
clientIPv4 := client.MustIPv4()
diagScript := `exec 3>/proc/1/fd/2
echo "=== DIAG: ip rule show ===" >&3; ip rule show >&3 2>&1
echo "=== DIAG: ip route show table 52 ===" >&3; ip route show table 52 >&3 2>&1
echo "=== DIAG: ip -6 route show table 52 ===" >&3; ip -6 route show table 52 >&3 2>&1
echo "=== DIAG: ip route show table all ===" >&3; ip route show table all >&3 2>&1`
_, _, _ = client.Execute([]string{"sh", "-c", diagScript})
peerIPs := lo.Filter(allIps, func(ip netip.Addr, _ int) bool {
return ip.Is4() && ip != clientIPv4
})
for _, peerIP := range peerIPs {
_, _, _ = client.Execute([]string{
"sh", "-c",
fmt.Sprintf(
"echo '=== DIAG: ip route get %s ===' >/proc/1/fd/2; ip route get %s >/proc/1/fd/2 2>&1",
peerIP, peerIP),
})
}
}
// Assertion 3: Each client has /32 routes for its peers in
// table 52. This is the primary regression check — in Tailscale
// >= 1.94, non-CGNAT peer /32 routes are missing from table 52
// because peerRoutes() gates them on --accept-routes.
for _, client := range allClients {
clientIPv4 := client.MustIPv4()
peerIPs := lo.Filter(allIps, func(ip netip.Addr, _ int) bool {
return ip.Is4() && ip != clientIPv4
})
assert.EventuallyWithT(t, func(c *assert.CollectT) {
stdout, _, err := client.Execute([]string{
"ip", "route", "show", "table", "52",
})
assert.NoErrorf(c, err,
"failed to get routing table for client %s", clientIPv4)
for _, peerIP := range peerIPs {
route := peerIP.String() + " "
assert.Containsf(c, stdout, route,
"client %s missing peer route for %s in table 52.\n"+
"Routing table contents:\n%s",
clientIPv4, peerIP, stdout,
)
}
}, 15*time.Second, 1*time.Second,
"peer /32 routes should be installed in table 52 for client %s",
clientIPv4)
}
// Assertion 4: Peers can ping each other. Skip self-pings
// because tailscale ping to own non-CGNAT IP fails with
// "no matching peer" (IsTailscaleIP doesn't recognize it).
for _, client := range allClients {
for _, addr := range allAddrs {
if isSelfClient(client, addr) {
continue
}
err := client.Ping(addr)
require.NoErrorf(t, err,
"failed to ping %s from %s", addr, client.Hostname())
}
}
})
}
}
// TestSubnetRouteInstallation verifies that subnet routes advertised by a
// peer are installed into kernel routing table 52 on accepting clients.
// This is a regression test for tailscale/tailscale#18587 and
// juanfont/headscale#3043: Tailscale >= 1.94 changed peerRoutes() to gate
// non-CGNAT AllowedIPs on routeAll (--accept-routes), which can prevent
// subnet routes from being installed even when --accept-routes is set.
//
// Unlike TestPeerRouteInstallation (which uses non-CGNAT peer addressing),
// this test uses the standard CGNAT range for peer IPs and tests subnet
// routes outside CGNAT — matching the real user scenario.
func TestSubnetRouteInstallation(t *testing.T) {
IntegrationSkip(t)
tests := []struct {
name string
version string
opts []tsic.Option
}{
{
name: "1.92",
version: "1.92",
},
{
// Parent of suspect commit c3b7f240 (PR #18173).
name: "before-c3b7f240",
version: "head",
opts: []tsic.Option{tsic.WithTailscaleRef("e9d82767e507108ed0f4eb0ff3b46a5625af7b0c")},
},
{
// The suspect commit: "ipn,ipn/local: always accept routes
// for Tailscale Services (cgnat range) (#18173)"
name: "c3b7f240",
version: "head",
opts: []tsic.Option{tsic.WithTailscaleRef("c3b7f2405155c39b563b85801724dc8855d1fbdb")},
},
{
// First commit after c3b7f240 on main.
name: "after-c3b7f240",
version: "head",
opts: []tsic.Option{tsic.WithTailscaleRef("5aeee1d8a576b29ddc6b6b0a8c3b526142fa9c9b")},
},
{
// v1.94.1 contains the regression from PR #18173.
// No Docker image exists on Docker Hub, so we build from source.
name: "v1.94.1",
version: "head",
opts: []tsic.Option{tsic.WithTailscaleRef("v1.94.1")},
},
{
name: "head",
version: "head",
},
}
subnetRoute := netip.MustParsePrefix("10.4.0.0/24")
hostRoute := netip.MustParsePrefix("10.4.1.1/32")
autoApprovePrefix := netip.MustParsePrefix("10.4.0.0/16")
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
spec := ScenarioSpec{
NodesPerUser: 3,
Users: []string{"user"},
Versions: []string{tt.version},
}
scenario, err := NewScenario(spec)
require.NoErrorf(t, err, "failed to create scenario: %s", err)
defer scenario.ShutdownAssertNoPanics(t)
opts := append(tt.opts,
tsic.WithAcceptRoutes(),
)
err = scenario.CreateHeadscaleEnv(
opts,
hsic.WithTestName("subnetroute"),
hsic.WithEmbeddedDERPServerOnly(),
hsic.WithTLS(),
hsic.WithACLPolicy(&policyv2.Policy{
ACLs: []policyv2.ACL{
{
Action: "accept",
Sources: []policyv2.Alias{wildcard()},
Destinations: []policyv2.AliasWithPorts{
aliasWithPorts(wildcard(), tailcfg.PortRangeAny),
},
},
},
AutoApprovers: policyv2.AutoApproverPolicy{
Routes: map[netip.Prefix]policyv2.AutoApprovers{
autoApprovePrefix: {usernameApprover("user@")},
},
},
}),
)
requireNoErrHeadscaleEnv(t, err)
allClients, err := scenario.ListTailscaleClients()
requireNoErrListClients(t, err)
allIps, err := scenario.ListTailscaleClientsIPs()
requireNoErrListClientIPs(t, err)
err = scenario.WaitForTailscaleSync()
requireNoErrSync(t, err)
headscale, err := scenario.Headscale()
requireNoErrGetHeadscale(t, err)
// Log the Tailscale version for debugging.
for _, client := range allClients {
ver, _, _ := client.Execute([]string{"tailscale", "version"})
t.Logf("client %s running: %s",
client.Hostname(),
strings.TrimSpace(strings.Split(ver, "\n")[0]))
}
// Designate the first client as the subnet router.
subnetRouter := allClients[0]
clients := allClients[1:]
advertiseRoutes := subnetRoute.String() + "," + hostRoute.String()
_, _, err = subnetRouter.Execute([]string{
"tailscale", "set",
"--advertise-routes=" + advertiseRoutes,
})
require.NoErrorf(t, err, "failed to advertise routes: %s", err)
// Wait for route propagation: the router node should have
// 2 available, 2 approved, 2 subnet routes.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
nodes, err := headscale.ListNodes()
assert.NoError(c, err)
for _, node := range nodes {
if node.GetName() == subnetRouter.Hostname() {
requireNodeRouteCountWithCollect(c, node, 2, 2, 2)
}
}
}, 30*time.Second, 500*time.Millisecond,
"subnet router should have 2 available, 2 approved, 2 subnet routes")
// Verify non-router clients see the subnet routes in peer status.
expectedPrefixes := []netip.Prefix{subnetRoute, hostRoute}
for _, client := range clients {
assert.EventuallyWithT(t, func(c *assert.CollectT) {
status, err := client.Status()
assert.NoError(c, err)
routerKey := subnetRouter.MustStatus().Self.PublicKey
peerStatus, ok := status.Peer[routerKey]
assert.True(c, ok, "client %s should see subnet router as peer", client.Hostname())
if ok {
requirePeerSubnetRoutesWithCollect(c, peerStatus, expectedPrefixes)
}
}, 15*time.Second, 500*time.Millisecond,
"client %s should see subnet routes from router", client.Hostname())
}
// Dump routing diagnostics to container PID 1 stderr so they
// appear in extracted log files.
for _, client := range clients {
diagScript := `exec 3>/proc/1/fd/2
echo "=== DIAG: ip rule show ===" >&3; ip rule show >&3 2>&1
echo "=== DIAG: ip route show table 52 ===" >&3; ip route show table 52 >&3 2>&1
echo "=== DIAG: ip -6 route show table 52 ===" >&3; ip -6 route show table 52 >&3 2>&1`
_, _, _ = client.Execute([]string{"sh", "-c", diagScript})
}
// Assert table 52 routes: each non-router client must have
// the subnet routes installed in kernel routing table 52.
for _, client := range clients {
assert.EventuallyWithT(t, func(c *assert.CollectT) {
stdout, _, err := client.Execute([]string{
"ip", "route", "show", "table", "52",
})
assert.NoErrorf(c, err,
"failed to get routing table for client %s", client.Hostname())
assert.Containsf(c, stdout, "10.4.0.0/24",
"client %s missing subnet route 10.4.0.0/24 in table 52.\n"+
"Routing table contents:\n%s",
client.Hostname(), stdout)
assert.Containsf(c, stdout, "10.4.1.1",
"client %s missing host route 10.4.1.1/32 in table 52.\n"+
"Routing table contents:\n%s",
client.Hostname(), stdout)
}, 15*time.Second, 1*time.Second,
"subnet routes should be installed in table 52 for client %s",
client.Hostname())
}
// Peer connectivity: all clients can ping each other over CGNAT IPs.
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
return x.String()
})
for _, client := range allClients {
for _, addr := range allAddrs {
if isSelfClient(client, addr) {
continue
}
err := client.Ping(addr)
require.NoErrorf(t, err,
"failed to ping %s from %s", addr, client.Hostname())
}
}
})
}
}