From b87567628a88703884d2c95c0aba7b0c2f118538 Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Fri, 22 Aug 2025 10:40:38 +0200 Subject: [PATCH 1/8] derp: increase update frequency and harden on failures (#2741) --- CHANGELOG.md | 15 +- config-example.yaml | 7 +- hscontrol/app.go | 45 +++-- hscontrol/derp/derp.go | 83 +++++--- hscontrol/derp/derp_test.go | 284 +++++++++++++++++++++++++++ hscontrol/derp/server/derp_server.go | 12 +- hscontrol/mapper/batcher_test.go | 7 + hscontrol/mapper/builder.go | 2 +- hscontrol/state/state.go | 28 +-- hscontrol/types/config.go | 1 + 10 files changed, 417 insertions(+), 67 deletions(-) create mode 100644 hscontrol/derp/derp_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index e3957b80..e77eb3e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Please read the [PR description](https://github.com/juanfont/headscale/pull/2617 for more technical details about the issues and solutions. **SQLite Database Backup Example:** + ```bash # Stop headscale systemctl stop headscale @@ -41,6 +42,13 @@ cp /var/lib/headscale/db.sqlite-shm /var/lib/headscale/db.sqlite-shm.backup systemctl start headscale ``` +### DERPMap update frequency + +The default DERPMap update frequency has been changed from 24 hours to 3 hours. +If you set the `derp.update_frequency` configuration option, it is recommended to change +it to `3h` to ensure that the headscale instance gets the latest DERPMap updates when +upstream is changed. + ### BREAKING - Remove support for 32-bit binaries @@ -55,6 +63,11 @@ systemctl start headscale - **IMPORTANT: Backup your SQLite database before upgrading** - Introduces safer table renaming migration strategy - Addresses longstanding database integrity issues +- DERPmap update frequency default changed from 24h to 3h + [#2741](https://github.com/juanfont/headscale/pull/2741) +- DERPmap update mechanism has been improved with retry, + and is now failing conservatively, preserving the old map upon failure. + [#2741](https://github.com/juanfont/headscale/pull/2741) - Add support for `autogroup:member`, `autogroup:tagged` [#2572](https://github.com/juanfont/headscale/pull/2572) - Remove policy v1 code [#2600](https://github.com/juanfont/headscale/pull/2600) @@ -72,7 +85,7 @@ systemctl start headscale [#2643](https://github.com/juanfont/headscale/pull/2643) - OIDC: Use group claim from UserInfo [#2663](https://github.com/juanfont/headscale/pull/2663) -- OIDC: Update user with claims from UserInfo *before* comparing with allowed +- OIDC: Update user with claims from UserInfo _before_ comparing with allowed groups, email and domain [#2663](https://github.com/juanfont/headscale/pull/2663) ## 0.26.1 (2025-06-06) diff --git a/config-example.yaml b/config-example.yaml index 8748b560..e476f9fd 100644 --- a/config-example.yaml +++ b/config-example.yaml @@ -128,7 +128,7 @@ derp: auto_update_enabled: true # How often should we check for DERP updates? - update_frequency: 24h + update_frequency: 3h # Disables the automatic check for headscale updates on startup disable_check_updates: false @@ -275,7 +275,7 @@ dns: # `hostname.base_domain` (e.g., _myhost.example.com_). base_domain: example.com - # Whether to use the local DNS settings of a node or override the local DNS + # Whether to use the local DNS settings of a node or override the local DNS # settings (default) and force the use of Headscale's DNS configuration. override_local_dns: true @@ -293,8 +293,7 @@ dns: # Split DNS (see https://tailscale.com/kb/1054/dns/), # a map of domains and which DNS server to use for each. - split: - {} + split: {} # foo.bar.com: # - 1.1.1.1 # darp.headscale.net: diff --git a/hscontrol/app.go b/hscontrol/app.go index ec8e2550..774aec46 100644 --- a/hscontrol/app.go +++ b/hscontrol/app.go @@ -17,6 +17,7 @@ import ( "syscall" "time" + "github.com/cenkalti/backoff/v5" "github.com/davecgh/go-spew/spew" "github.com/gorilla/mux" grpcRuntime "github.com/grpc-ecosystem/grpc-gateway/v2/runtime" @@ -284,11 +285,23 @@ func (h *Headscale) scheduledTasks(ctx context.Context) { case <-derpTickerChan: log.Info().Msg("Fetching DERPMap updates") - derpMap := derp.GetDERPMap(h.cfg.DERP) - if h.cfg.DERP.ServerEnabled && h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion { - region, _ := h.DERPServer.GenerateRegion() - derpMap.Regions[region.RegionID] = ®ion + derpMap, err := backoff.Retry(ctx, func() (*tailcfg.DERPMap, error) { + derpMap, err := derp.GetDERPMap(h.cfg.DERP) + if err != nil { + return nil, err + } + if h.cfg.DERP.ServerEnabled && h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion { + region, _ := h.DERPServer.GenerateRegion() + derpMap.Regions[region.RegionID] = ®ion + } + + return derpMap, nil + }, backoff.WithBackOff(backoff.NewExponentialBackOff())) + if err != nil { + log.Error().Err(err).Msg("failed to build new DERPMap, retrying later") + continue } + h.state.SetDERPMap(derpMap) h.Change(change.DERPSet) @@ -516,29 +529,31 @@ func (h *Headscale) Serve() error { h.mapBatcher.Start() defer h.mapBatcher.Close() - // TODO(kradalby): fix state part. if h.cfg.DERP.ServerEnabled { // When embedded DERP is enabled we always need a STUN server if h.cfg.DERP.STUNAddr == "" { return errSTUNAddressNotSet } - region, err := h.DERPServer.GenerateRegion() - if err != nil { - return fmt.Errorf("generating DERP region for embedded server: %w", err) - } - - if h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion { - h.state.DERPMap().Regions[region.RegionID] = ®ion - } - go h.DERPServer.ServeSTUN() } - if len(h.state.DERPMap().Regions) == 0 { + derpMap, err := derp.GetDERPMap(h.cfg.DERP) + if err != nil { + return fmt.Errorf("failed to get DERPMap: %w", err) + } + + if h.cfg.DERP.ServerEnabled && h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion { + region, _ := h.DERPServer.GenerateRegion() + derpMap.Regions[region.RegionID] = ®ion + } + + if len(derpMap.Regions) == 0 { return errEmptyInitialDERPMap } + h.state.SetDERPMap(derpMap) + // Start ephemeral node garbage collector and schedule all nodes // that are already in the database and ephemeral. If they are still // around between restarts, they will reconnect and the GC will diff --git a/hscontrol/derp/derp.go b/hscontrol/derp/derp.go index 1ed619ec..b3e2475d 100644 --- a/hscontrol/derp/derp.go +++ b/hscontrol/derp/derp.go @@ -1,16 +1,22 @@ package derp import ( + "cmp" "context" "encoding/json" + "hash/crc64" "io" "maps" + "math/rand" "net/http" "net/url" "os" + "reflect" + "sync" + "time" "github.com/juanfont/headscale/hscontrol/types" - "github.com/rs/zerolog/log" + "github.com/spf13/viper" "gopkg.in/yaml.v3" "tailscale.com/tailcfg" ) @@ -79,26 +85,16 @@ func mergeDERPMaps(derpMaps []*tailcfg.DERPMap) *tailcfg.DERPMap { return &result } -func GetDERPMap(cfg types.DERPConfig) *tailcfg.DERPMap { +func GetDERPMap(cfg types.DERPConfig) (*tailcfg.DERPMap, error) { var derpMaps []*tailcfg.DERPMap if cfg.DERPMap != nil { derpMaps = append(derpMaps, cfg.DERPMap) } for _, path := range cfg.Paths { - log.Debug(). - Str("func", "GetDERPMap"). - Str("path", path). - Msg("Loading DERPMap from path") derpMap, err := loadDERPMapFromPath(path) if err != nil { - log.Error(). - Str("func", "GetDERPMap"). - Str("path", path). - Err(err). - Msg("Could not load DERP map from path") - - break + return nil, err } derpMaps = append(derpMaps, derpMap) @@ -106,26 +102,59 @@ func GetDERPMap(cfg types.DERPConfig) *tailcfg.DERPMap { for _, addr := range cfg.URLs { derpMap, err := loadDERPMapFromURL(addr) - log.Debug(). - Str("func", "GetDERPMap"). - Str("url", addr.String()). - Msg("Loading DERPMap from path") if err != nil { - log.Error(). - Str("func", "GetDERPMap"). - Str("url", addr.String()). - Err(err). - Msg("Could not load DERP map from path") - - break + return nil, err } derpMaps = append(derpMaps, derpMap) } derpMap := mergeDERPMaps(derpMaps) + shuffleDERPMap(derpMap) - log.Trace().Interface("derpMap", derpMap).Msg("DERPMap loaded") - - return derpMap + return derpMap, nil +} + +func shuffleDERPMap(dm *tailcfg.DERPMap) { + if dm == nil || len(dm.Regions) == 0 { + return + } + + for id, region := range dm.Regions { + if len(region.Nodes) == 0 { + continue + } + + dm.Regions[id] = shuffleRegionNoClone(region) + } +} + +var crc64Table = crc64.MakeTable(crc64.ISO) + +var ( + derpRandomOnce sync.Once + derpRandomInst *rand.Rand + derpRandomMu sync.RWMutex +) + +func derpRandom() *rand.Rand { + derpRandomOnce.Do(func() { + seed := cmp.Or(viper.GetString("dns.base_domain"), time.Now().String()) + rnd := rand.New(rand.NewSource(0)) + rnd.Seed(int64(crc64.Checksum([]byte(seed), crc64Table))) + derpRandomInst = rnd + }) + return derpRandomInst +} + +func resetDerpRandomForTesting() { + derpRandomMu.Lock() + defer derpRandomMu.Unlock() + derpRandomOnce = sync.Once{} + derpRandomInst = nil +} + +func shuffleRegionNoClone(r *tailcfg.DERPRegion) *tailcfg.DERPRegion { + derpRandom().Shuffle(len(r.Nodes), reflect.Swapper(r.Nodes)) + return r } diff --git a/hscontrol/derp/derp_test.go b/hscontrol/derp/derp_test.go new file mode 100644 index 00000000..2e8ace91 --- /dev/null +++ b/hscontrol/derp/derp_test.go @@ -0,0 +1,284 @@ +package derp + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/spf13/viper" + "tailscale.com/tailcfg" +) + +func TestShuffleDERPMapDeterministic(t *testing.T) { + tests := []struct { + name string + baseDomain string + derpMap *tailcfg.DERPMap + expected *tailcfg.DERPMap + }{ + { + name: "single region with 4 nodes", + baseDomain: "test1.example.com", + derpMap: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + RegionCode: "nyc", + RegionName: "New York City", + Nodes: []*tailcfg.DERPNode{ + {Name: "1f", RegionID: 1, HostName: "derp1f.tailscale.com"}, + {Name: "1g", RegionID: 1, HostName: "derp1g.tailscale.com"}, + {Name: "1h", RegionID: 1, HostName: "derp1h.tailscale.com"}, + {Name: "1i", RegionID: 1, HostName: "derp1i.tailscale.com"}, + }, + }, + }, + }, + expected: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + RegionCode: "nyc", + RegionName: "New York City", + Nodes: []*tailcfg.DERPNode{ + {Name: "1g", RegionID: 1, HostName: "derp1g.tailscale.com"}, + {Name: "1f", RegionID: 1, HostName: "derp1f.tailscale.com"}, + {Name: "1i", RegionID: 1, HostName: "derp1i.tailscale.com"}, + {Name: "1h", RegionID: 1, HostName: "derp1h.tailscale.com"}, + }, + }, + }, + }, + }, + { + name: "multiple regions with nodes", + baseDomain: "test2.example.com", + derpMap: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 10: { + RegionID: 10, + RegionCode: "sea", + RegionName: "Seattle", + Nodes: []*tailcfg.DERPNode{ + {Name: "10b", RegionID: 10, HostName: "derp10b.tailscale.com"}, + {Name: "10c", RegionID: 10, HostName: "derp10c.tailscale.com"}, + {Name: "10d", RegionID: 10, HostName: "derp10d.tailscale.com"}, + }, + }, + 2: { + RegionID: 2, + RegionCode: "sfo", + RegionName: "San Francisco", + Nodes: []*tailcfg.DERPNode{ + {Name: "2d", RegionID: 2, HostName: "derp2d.tailscale.com"}, + {Name: "2e", RegionID: 2, HostName: "derp2e.tailscale.com"}, + {Name: "2f", RegionID: 2, HostName: "derp2f.tailscale.com"}, + }, + }, + }, + }, + expected: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 10: { + RegionID: 10, + RegionCode: "sea", + RegionName: "Seattle", + Nodes: []*tailcfg.DERPNode{ + {Name: "10b", RegionID: 10, HostName: "derp10b.tailscale.com"}, + {Name: "10c", RegionID: 10, HostName: "derp10c.tailscale.com"}, + {Name: "10d", RegionID: 10, HostName: "derp10d.tailscale.com"}, + }, + }, + 2: { + RegionID: 2, + RegionCode: "sfo", + RegionName: "San Francisco", + Nodes: []*tailcfg.DERPNode{ + {Name: "2f", RegionID: 2, HostName: "derp2f.tailscale.com"}, + {Name: "2e", RegionID: 2, HostName: "derp2e.tailscale.com"}, + {Name: "2d", RegionID: 2, HostName: "derp2d.tailscale.com"}, + }, + }, + }, + }, + }, + { + name: "large region with many nodes", + baseDomain: "test3.example.com", + derpMap: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 4: { + RegionID: 4, + RegionCode: "fra", + RegionName: "Frankfurt", + Nodes: []*tailcfg.DERPNode{ + {Name: "4f", RegionID: 4, HostName: "derp4f.tailscale.com"}, + {Name: "4g", RegionID: 4, HostName: "derp4g.tailscale.com"}, + {Name: "4h", RegionID: 4, HostName: "derp4h.tailscale.com"}, + {Name: "4i", RegionID: 4, HostName: "derp4i.tailscale.com"}, + }, + }, + }, + }, + expected: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 4: { + RegionID: 4, + RegionCode: "fra", + RegionName: "Frankfurt", + Nodes: []*tailcfg.DERPNode{ + {Name: "4f", RegionID: 4, HostName: "derp4f.tailscale.com"}, + {Name: "4h", RegionID: 4, HostName: "derp4h.tailscale.com"}, + {Name: "4g", RegionID: 4, HostName: "derp4g.tailscale.com"}, + {Name: "4i", RegionID: 4, HostName: "derp4i.tailscale.com"}, + }, + }, + }, + }, + }, + { + name: "same region different base domain", + baseDomain: "different.example.com", + derpMap: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 4: { + RegionID: 4, + RegionCode: "fra", + RegionName: "Frankfurt", + Nodes: []*tailcfg.DERPNode{ + {Name: "4f", RegionID: 4, HostName: "derp4f.tailscale.com"}, + {Name: "4g", RegionID: 4, HostName: "derp4g.tailscale.com"}, + {Name: "4h", RegionID: 4, HostName: "derp4h.tailscale.com"}, + {Name: "4i", RegionID: 4, HostName: "derp4i.tailscale.com"}, + }, + }, + }, + }, + expected: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 4: { + RegionID: 4, + RegionCode: "fra", + RegionName: "Frankfurt", + Nodes: []*tailcfg.DERPNode{ + {Name: "4g", RegionID: 4, HostName: "derp4g.tailscale.com"}, + {Name: "4i", RegionID: 4, HostName: "derp4i.tailscale.com"}, + {Name: "4f", RegionID: 4, HostName: "derp4f.tailscale.com"}, + {Name: "4h", RegionID: 4, HostName: "derp4h.tailscale.com"}, + }, + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + viper.Set("dns.base_domain", tt.baseDomain) + defer viper.Reset() + resetDerpRandomForTesting() + + testMap := tt.derpMap.View().AsStruct() + shuffleDERPMap(testMap) + + if diff := cmp.Diff(tt.expected, testMap); diff != "" { + t.Errorf("Shuffled DERP map doesn't match expected (-expected +actual):\n%s", diff) + } + }) + } + +} + +func TestShuffleDERPMapEdgeCases(t *testing.T) { + tests := []struct { + name string + derpMap *tailcfg.DERPMap + }{ + { + name: "nil derp map", + derpMap: nil, + }, + { + name: "empty derp map", + derpMap: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{}, + }, + }, + { + name: "region with no nodes", + derpMap: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + RegionCode: "empty", + RegionName: "Empty Region", + Nodes: []*tailcfg.DERPNode{}, + }, + }, + }, + }, + { + name: "region with single node", + derpMap: &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + RegionCode: "single", + RegionName: "Single Node Region", + Nodes: []*tailcfg.DERPNode{ + {Name: "1a", RegionID: 1, HostName: "derp1a.tailscale.com"}, + }, + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + shuffleDERPMap(tt.derpMap) + }) + } +} + + +func TestShuffleDERPMapWithoutBaseDomain(t *testing.T) { + viper.Reset() + resetDerpRandomForTesting() + + derpMap := &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + RegionCode: "test", + RegionName: "Test Region", + Nodes: []*tailcfg.DERPNode{ + {Name: "1a", RegionID: 1, HostName: "derp1a.test.com"}, + {Name: "1b", RegionID: 1, HostName: "derp1b.test.com"}, + {Name: "1c", RegionID: 1, HostName: "derp1c.test.com"}, + {Name: "1d", RegionID: 1, HostName: "derp1d.test.com"}, + }, + }, + }, + } + + original := derpMap.View().AsStruct() + shuffleDERPMap(derpMap) + + if len(derpMap.Regions) != 1 || len(derpMap.Regions[1].Nodes) != 4 { + t.Error("Shuffle corrupted DERP map structure") + } + + originalNodes := make(map[string]bool) + for _, node := range original.Regions[1].Nodes { + originalNodes[node.Name] = true + } + + shuffledNodes := make(map[string]bool) + for _, node := range derpMap.Regions[1].Nodes { + shuffledNodes[node.Name] = true + } + + if diff := cmp.Diff(originalNodes, shuffledNodes); diff != "" { + t.Errorf("Shuffle changed node set (-original +shuffled):\n%s", diff) + } +} diff --git a/hscontrol/derp/server/derp_server.go b/hscontrol/derp/server/derp_server.go index fee395f1..b8f892be 100644 --- a/hscontrol/derp/server/derp_server.go +++ b/hscontrol/derp/server/derp_server.go @@ -276,7 +276,7 @@ func DERPProbeHandler( // An example implementation is found here https://derp.tailscale.com/bootstrap-dns // Coordination server is included automatically, since local DERP is using the same DNS Name in d.serverURL. func DERPBootstrapDNSHandler( - derpMap *tailcfg.DERPMap, + derpMap tailcfg.DERPMapView, ) func(http.ResponseWriter, *http.Request) { return func( writer http.ResponseWriter, @@ -287,18 +287,18 @@ func DERPBootstrapDNSHandler( resolvCtx, cancel := context.WithTimeout(req.Context(), time.Minute) defer cancel() var resolver net.Resolver - for _, region := range derpMap.Regions { - for _, node := range region.Nodes { // we don't care if we override some nodes - addrs, err := resolver.LookupIP(resolvCtx, "ip", node.HostName) + for _, region := range derpMap.Regions().All() { + for _, node := range region.Nodes().All() { // we don't care if we override some nodes + addrs, err := resolver.LookupIP(resolvCtx, "ip", node.HostName()) if err != nil { log.Trace(). Caller(). Err(err). - Msgf("bootstrap DNS lookup failed %q", node.HostName) + Msgf("bootstrap DNS lookup failed %q", node.HostName()) continue } - dnsEntries[node.HostName] = addrs + dnsEntries[node.HostName()] = addrs } } writer.Header().Set("Content-Type", "application/json") diff --git a/hscontrol/mapper/batcher_test.go b/hscontrol/mapper/batcher_test.go index b2a632d4..8ea72876 100644 --- a/hscontrol/mapper/batcher_test.go +++ b/hscontrol/mapper/batcher_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/juanfont/headscale/hscontrol/db" + "github.com/juanfont/headscale/hscontrol/derp" "github.com/juanfont/headscale/hscontrol/state" "github.com/juanfont/headscale/hscontrol/types" "github.com/juanfont/headscale/hscontrol/types/change" @@ -167,6 +168,12 @@ func setupBatcherWithTestData(t *testing.T, bf batcherFunc, userCount, nodesPerU t.Fatalf("Failed to create state: %v", err) } + derpMap, err := derp.GetDERPMap(cfg.DERP) + assert.NoError(t, err) + assert.NotNil(t, derpMap) + + state.SetDERPMap(derpMap) + // Set up a permissive policy that allows all communication for testing allowAllPolicy := `{ "acls": [ diff --git a/hscontrol/mapper/builder.go b/hscontrol/mapper/builder.go index b6102c01..111724bc 100644 --- a/hscontrol/mapper/builder.go +++ b/hscontrol/mapper/builder.go @@ -79,7 +79,7 @@ func (b *MapResponseBuilder) WithSelfNode() *MapResponseBuilder { // WithDERPMap adds the DERP map to the response func (b *MapResponseBuilder) WithDERPMap() *MapResponseBuilder { - b.resp.DERPMap = b.mapper.state.DERPMap() + b.resp.DERPMap = b.mapper.state.DERPMap().AsStruct() return b } diff --git a/hscontrol/state/state.go b/hscontrol/state/state.go index 02d5d3cd..0a743184 100644 --- a/hscontrol/state/state.go +++ b/hscontrol/state/state.go @@ -9,10 +9,10 @@ import ( "io" "net/netip" "os" + "sync/atomic" "time" hsdb "github.com/juanfont/headscale/hscontrol/db" - "github.com/juanfont/headscale/hscontrol/derp" "github.com/juanfont/headscale/hscontrol/policy" "github.com/juanfont/headscale/hscontrol/policy/matcher" "github.com/juanfont/headscale/hscontrol/routes" @@ -55,7 +55,7 @@ type State struct { // ipAlloc manages IP address allocation for nodes ipAlloc *hsdb.IPAllocator // derpMap contains the current DERP relay configuration - derpMap *tailcfg.DERPMap + derpMap atomic.Pointer[tailcfg.DERPMap] // polMan handles policy evaluation and management polMan policy.PolicyManager // registrationCache caches node registration data to reduce database load @@ -86,8 +86,6 @@ func NewState(cfg *types.Config) (*State, error) { return nil, fmt.Errorf("init ip allocatior: %w", err) } - derpMap := derp.GetDERPMap(cfg.DERP) - nodes, err := db.ListNodes() if err != nil { return nil, fmt.Errorf("loading nodes: %w", err) @@ -107,17 +105,17 @@ func NewState(cfg *types.Config) (*State, error) { return nil, fmt.Errorf("init policy manager: %w", err) } - return &State{ + s := &State{ cfg: cfg, - db: db, - ipAlloc: ipAlloc, - // TODO(kradalby): Update DERPMap - derpMap: derpMap, + db: db, + ipAlloc: ipAlloc, polMan: polMan, registrationCache: registrationCache, primaryRoutes: routes.New(), - }, nil + } + + return s, nil } // Close gracefully shuts down the State instance and releases all resources. @@ -170,9 +168,14 @@ func policyBytes(db *hsdb.HSDatabase, cfg *types.Config) ([]byte, error) { return nil, fmt.Errorf("%w: %s", ErrUnsupportedPolicyMode, cfg.Policy.Mode) } +// SetDERPMap updates the DERP relay configuration. +func (s *State) SetDERPMap(dm *tailcfg.DERPMap) { + s.derpMap.Store(dm) +} + // DERPMap returns the current DERP relay configuration for peer-to-peer connectivity. -func (s *State) DERPMap() *tailcfg.DERPMap { - return s.derpMap +func (s *State) DERPMap() tailcfg.DERPMapView { + return s.derpMap.Load().View() } // ReloadPolicy reloads the access control policy and triggers auto-approval if changed. @@ -209,7 +212,6 @@ func (s *State) CreateUser(user types.User) (*types.User, bool, error) { s.mu.Lock() defer s.mu.Unlock() - if err := s.db.DB.Save(&user).Error; err != nil { return nil, false, fmt.Errorf("creating user: %w", err) } diff --git a/hscontrol/types/config.go b/hscontrol/types/config.go index be0bce81..f23b75e8 100644 --- a/hscontrol/types/config.go +++ b/hscontrol/types/config.go @@ -300,6 +300,7 @@ func LoadConfig(path string, isFile bool) error { viper.SetDefault("derp.server.verify_clients", true) viper.SetDefault("derp.server.stun.enabled", true) viper.SetDefault("derp.server.automatically_add_embedded_derp_region", true) + viper.SetDefault("derp.update_frequency", "3h") viper.SetDefault("unix_socket", "/var/run/headscale/headscale.sock") viper.SetDefault("unix_socket_permission", "0o770") From 4d61da30d0e5909dd1410f3927455000e1a53738 Mon Sep 17 00:00:00 2001 From: Florian Preinstorfer Date: Fri, 15 Aug 2025 15:47:11 +0200 Subject: [PATCH 2/8] Use an IPv4 address range suitable for documentation --- config-example.yaml | 2 +- derp-example.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config-example.yaml b/config-example.yaml index e476f9fd..6afab21b 100644 --- a/config-example.yaml +++ b/config-example.yaml @@ -105,7 +105,7 @@ derp: # For better connection stability (especially when using an Exit-Node and DNS is not working), # it is possible to optionally add the public IPv4 and IPv6 address to the Derp-Map using: - ipv4: 1.2.3.4 + ipv4: 198.51.100.1 ipv6: 2001:db8::1 # List of externally available DERP maps encoded in JSON diff --git a/derp-example.yaml b/derp-example.yaml index 732c4ba0..26cca492 100644 --- a/derp-example.yaml +++ b/derp-example.yaml @@ -7,9 +7,9 @@ regions: nodes: - name: 900a regionid: 900 - hostname: myderp.mydomain.no - ipv4: 123.123.123.123 - ipv6: "2604:a880:400:d1::828:b001" + hostname: myderp.example.com + ipv4: 198.51.100.1 + ipv6: 2001:db8::1 stunport: 0 stunonly: false derpport: 0 From e949859d33fc0888818088d5047673561bdf1b81 Mon Sep 17 00:00:00 2001 From: Florian Preinstorfer Date: Wed, 13 Aug 2025 22:18:55 +0200 Subject: [PATCH 3/8] Add DERP docs --- docs/about/features.md | 2 +- docs/ref/derp.md | 153 ++++++++++++++++++++++++++ docs/ref/integration/reverse-proxy.md | 2 +- docs/setup/requirements.md | 4 +- mkdocs.yml | 1 + 5 files changed, 158 insertions(+), 4 deletions(-) create mode 100644 docs/ref/derp.md diff --git a/docs/about/features.md b/docs/about/features.md index 33b32618..14d484bc 100644 --- a/docs/about/features.md +++ b/docs/about/features.md @@ -19,7 +19,7 @@ provides on overview of Headscale's feature and compatibility with the Tailscale - [x] [Exit nodes](../ref/routes.md#exit-node) - [x] Dual stack (IPv4 and IPv6) - [x] Ephemeral nodes -- [x] Embedded [DERP server](https://tailscale.com/kb/1232/derp-servers) +- [x] Embedded [DERP server](../ref/derp.md) - [x] Access control lists ([GitHub label "policy"](https://github.com/juanfont/headscale/labels/policy%20%F0%9F%93%9D)) - [x] ACL management via API - [x] Some [Autogroups](https://tailscale.com/kb/1396/targets#autogroups), currently: `autogroup:internet`, diff --git a/docs/ref/derp.md b/docs/ref/derp.md new file mode 100644 index 00000000..a0189e10 --- /dev/null +++ b/docs/ref/derp.md @@ -0,0 +1,153 @@ +# DERP + +A [DERP (Designated Encrypted Relay for Packets) server](https://tailscale.com/kb/1232/derp-servers) is mainly used to +relay traffic between two nodes in case a direct connection can't be established. Headscale provides an embedded DERP +server to ensure seamless connectivity between nodes. + +## Configuration + +DERP related settings are configured within the `derp` section of the [configuration file](./configuration.md). The +following sections only use a few of the available settings, check the [example configuration](./configuration.md) for +all available configuration options. + +### Enable embedded DERP + +Headscale ships with an embedded DERP server which allows to run your own self-hosted DERP server easily. The embedded +DERP server is disabled by default and needs to be enabled. In addition, you should configure the public IPv4 and public +IPv6 address of your Headscale server for improved connection stability: + +```yaml title="config.yaml" hl_lines="3-5" +derp: + server: + enabled: true + ipv4: 198.51.100.1 + ipv6: 2001:db8::1 +``` + +Keep in mind that [additional ports are needed to run a DERP server](../setup/requirements.md#ports-in-use). Besides +relaying traffic, it also uses STUN (udp/3478) to help clients discover their public IP addresses and perform NAT +traversal. [Check DERP server connectivity](#check-derp-server-connectivity) to see if everything works. + +### Remove Tailscale's DERP servers + +Once enabled, Headscale's embedded DERP is added to the list of free-to-use [DERP +servers](https://tailscale.com/kb/1232/derp-servers) offered by Tailscale Inc. To only use Headscale's embedded DERP +server, disable the loading of the default DERP map: + +```yaml title="config.yaml" hl_lines="6" +derp: + server: + enabled: true + ipv4: 198.51.100.1 + ipv6: 2001:db8::1 + urls: [] +``` + +!!! warning "Single point of failure" + + Removing Tailscale's DERP servers means that there is now just a single DERP server available for clients. This is a + single point of failure and could hamper connectivity. + + [Check DERP server connectivity](#check-derp-server-connectivity) with your embedded DERP server before removing + Tailscale's DERP servers. + +### Customize DERP map + +The DERP map offered to clients can be customized with a [dedicated YAML-configuration +file](https://github.com/juanfont/headscale/blob/main/derp-example.yaml). Typical use-cases involve: + +- Running a fleet of [custom DERP servers](https://tailscale.com/kb/1118/custom-derp-servers) +- Excluding or choosing specific regions from the Tailscale's list of free-to-use [DERP + servers](https://tailscale.com/kb/1232/derp-servers) + +The following sample `derp.yaml` references two custom regions (`custom-east` with ID 900 and `custom-west` with ID 901) +with one custom DERP server in each region. Each DERP server offers DERP relay via HTTPS on tcp/443, support for captive +portal checks via HTTP on tcp/80 and STUN on udp/3478. See the definitions of +[DERPMap](https://pkg.go.dev/tailscale.com/tailcfg#DERPMap), +[DERPRegion](https://pkg.go.dev/tailscale.com/tailcfg#DERPRegion) and +[DERPNode](https://pkg.go.dev/tailscale.com/tailcfg#DERPNode) for all available options. + +```yaml title="derp.yaml" +regions: + 900: + regionid: 900 + regioncode: custom-east + regionname: My region (east) + nodes: + - name: 900a + regionid: 900 + hostname: derp900a.example.com + ipv4: 198.51.100.1 + ipv6: 2001:db8::1 + canport80: true + 901: + regionid: 901 + regioncode: custom-west + regionname: My Region (west) + nodes: + - name: 901a + regionid: 901 + hostname: derp901a.example.com + ipv4: 198.51.100.2 + ipv6: 2001:db8::2 + canport80: true +``` + +Use the following configuration to only serve the two DERP servers from the above `derp.yaml`: + +```yaml title="config.yaml" hl_lines="5 6" +derp: + server: + enabled: false + urls: [] + paths: + - /etc/headscale/derp.yaml +``` + +The embedded DERP server can also be enabled and is automatically added to the custom DERP map. + + +### Verify clients + +Access to DERP serves can be restricted to nodes that are members of your Tailnet. Relay access is denied for unknown +clients. + +=== "Embedded DERP" + + Client verification is enabled by default. + + ```yaml title="config.yaml" hl_lines="3" + derp: + server: + verify_clients: true + ``` + +=== "3rd-party DERP" + + Tailscale's `derper` provides two parameters to configure client verification: + + - Use the `-verify-client-url` parameter of the `derper` and point it towards the `/verify` endpoint of your + Headscale server (e.g `https://headscale.example.com/verify`). The DERP server will query your Headscale instance + as soon as a client connects with it to ask whether access should be allowed or denied. Access is allowed if + Headscale knows about the connecting client and denied otherwise. + - The parameter `-verify-client-url-fail-open` controls what should happen when the DERP server can't reach the + Headscale instance. By default, it will allow access if Headscale is unreachable. + +## Check DERP server connectivity + +Any Tailscale client may be used to introspect the DERP map and to check for connectivity issues with DERP servers. + +- Display DERP map: `tailscale debug derp-map` +- Check connectivity with the embedded DERP[^1]:`tailscale debug derp headscale` + +Additional DERP related metrics and information is available via the [metrics and debug +endpoint](./debug.md#metrics-and-debug-endpoint). + +[^1]: + This assumes that the default region code of the [configuration file](./configuration.md) is used. + +## Limitations + +- The embedded DERP server can't be used for Tailscale's captive portal checks as it doesn't support the `/generate_204` + endpoint via HTTP on port tcp/80. +- There are no speed or throughput optimisations, the main purpose is to assist in node connectivity. diff --git a/docs/ref/integration/reverse-proxy.md b/docs/ref/integration/reverse-proxy.md index 91ee8dfc..3586171f 100644 --- a/docs/ref/integration/reverse-proxy.md +++ b/docs/ref/integration/reverse-proxy.md @@ -13,7 +13,7 @@ Running headscale behind a reverse proxy is useful when running multiple applica The reverse proxy MUST be configured to support WebSockets to communicate with Tailscale clients. -WebSockets support is also required when using the headscale embedded DERP server. In this case, you will also need to expose the UDP port used for STUN (by default, udp/3478). Please check our [config-example.yaml](https://github.com/juanfont/headscale/blob/main/config-example.yaml). +WebSockets support is also required when using the Headscale [embedded DERP server](../derp.md). In this case, you will also need to expose the UDP port used for STUN (by default, udp/3478). Please check our [config-example.yaml](https://github.com/juanfont/headscale/blob/main/config-example.yaml). ### Cloudflare diff --git a/docs/setup/requirements.md b/docs/setup/requirements.md index 1c2450a2..627e24ed 100644 --- a/docs/setup/requirements.md +++ b/docs/setup/requirements.md @@ -22,10 +22,10 @@ The ports in use vary with the intended scenario and enabled features. Some of t - tcp/443 - Expose publicly: yes - HTTPS, required to make Headscale available to Tailscale clients[^1] - - Required if the built-in DERP server is enabled + - Required if the [embedded DERP server](../ref/derp.md) is enabled - udp/3478 - Expose publicly: yes - - STUN, required if the built-in DERP server is enabled + - STUN, required if the [embedded DERP server](../ref/derp.md) is enabled - tcp/50443 - Expose publicly: yes - Only required if the gRPC interface is used to [remote-control Headscale](../ref/remote-cli.md). diff --git a/mkdocs.yml b/mkdocs.yml index aa76a7d2..3881cabd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -181,6 +181,7 @@ nav: - TLS: ref/tls.md - ACLs: ref/acls.md - DNS: ref/dns.md + - DERP: ref/derp.md - Remote CLI: ref/remote-cli.md - Debug: ref/debug.md - Integration: From 630bfd265ac76f31e0a88cb7f92d356d38e9dc3d Mon Sep 17 00:00:00 2001 From: Andrey Bobelev Date: Wed, 14 May 2025 15:04:31 +0300 Subject: [PATCH 4/8] chore(derp): prioritize loading DERP maps from URLs This allows users to override default entries provided via URL --- hscontrol/derp/derp.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hscontrol/derp/derp.go b/hscontrol/derp/derp.go index b3e2475d..839de9b5 100644 --- a/hscontrol/derp/derp.go +++ b/hscontrol/derp/derp.go @@ -91,8 +91,8 @@ func GetDERPMap(cfg types.DERPConfig) (*tailcfg.DERPMap, error) { derpMaps = append(derpMaps, cfg.DERPMap) } - for _, path := range cfg.Paths { - derpMap, err := loadDERPMapFromPath(path) + for _, addr := range cfg.URLs { + derpMap, err := loadDERPMapFromURL(addr) if err != nil { return nil, err } @@ -100,8 +100,8 @@ func GetDERPMap(cfg types.DERPConfig) (*tailcfg.DERPMap, error) { derpMaps = append(derpMaps, derpMap) } - for _, addr := range cfg.URLs { - derpMap, err := loadDERPMapFromURL(addr) + for _, path := range cfg.Paths { + derpMap, err := loadDERPMapFromPath(path) if err != nil { return nil, err } From d29feaef79587092b89e8efaa2221620b5c08683 Mon Sep 17 00:00:00 2001 From: Andrey Bobelev Date: Wed, 14 May 2025 15:24:40 +0300 Subject: [PATCH 5/8] chore(derp): allow nil regions in DERPMaps Previously, nil regions were not properly handled. This change allows users to disable regions in DERPMaps. Particularly useful to disable some official regions. --- hscontrol/derp/derp.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hscontrol/derp/derp.go b/hscontrol/derp/derp.go index 839de9b5..479bfe5c 100644 --- a/hscontrol/derp/derp.go +++ b/hscontrol/derp/derp.go @@ -82,6 +82,12 @@ func mergeDERPMaps(derpMaps []*tailcfg.DERPMap) *tailcfg.DERPMap { maps.Copy(result.Regions, derpMap.Regions) } + for id, region := range result.Regions { + if region == nil { + delete(result.Regions, id) + } + } + return &result } From a2a6d2021802d2113dcd0e3fb35b228aac4c3080 Mon Sep 17 00:00:00 2001 From: cuiweixie Date: Sat, 23 Aug 2025 23:01:08 +0800 Subject: [PATCH 6/8] Refactor to use reflect.TypeFor --- hscontrol/db/text_serialiser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hscontrol/db/text_serialiser.go b/hscontrol/db/text_serialiser.go index 524b2696..1652901f 100644 --- a/hscontrol/db/text_serialiser.go +++ b/hscontrol/db/text_serialiser.go @@ -10,7 +10,7 @@ import ( ) // Got from https://github.com/xdg-go/strum/blob/main/types.go -var textUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem() +var textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() func isTextUnmarshaler(rv reflect.Value) bool { return rv.Type().Implements(textUnmarshalerType) From 860a8a597f095f788ce7abd89b12cfd75e117794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dylan=20Blanqu=C3=A9?= <68660667+dblanque@users.noreply.github.com> Date: Sat, 23 Aug 2025 16:19:23 -0300 Subject: [PATCH 7/8] Update tools.md Share/Contribute Headscale Zabbix Monitoring scripts and templates. Thank you for the awesome application to everyone involved in Headscale's development! --- docs/ref/integration/tools.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ref/integration/tools.md b/docs/ref/integration/tools.md index f7119087..d5849ffe 100644 --- a/docs/ref/integration/tools.md +++ b/docs/ref/integration/tools.md @@ -13,3 +13,4 @@ This page collects third-party tools, client libraries, and scripts related to h | headscalebacktosqlite | [Github](https://github.com/bigbozza/headscalebacktosqlite) | Migrate headscale from PostgreSQL back to SQLite | | headscale-pf | [Github](https://github.com/YouSysAdmin/headscale-pf) | Populates user groups based on user groups in Jumpcloud or Authentik | | headscale-client-go | [Github](https://github.com/hibare/headscale-client-go) | A Go client implementation for the Headscale HTTP API. | +| headscale-zabbix | [Github](https://github.com/dblanque/headscale-zabbix) | A Zabbix Monitoring Template for the Headscale Service. | From 1a7a2f41962f74ccee7088b6613de2097046b428 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 24 Aug 2025 12:07:32 +0000 Subject: [PATCH 8/8] flake.lock: Update (#2699) --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index bc10f127..94bba45e 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1752012998, - "narHash": "sha256-Q82Ms+FQmgOBkdoSVm+FBpuFoeUAffNerR5yVV7SgT8=", + "lastModified": 1755829505, + "narHash": "sha256-4/Jd+LkQ2ssw8luQVkqVs9spDBVE6h/u/hC/tzngsPo=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "2a2130494ad647f953593c4e84ea4df839fbd68c", + "rev": "f937f8ecd1c70efd7e9f90ba13dfb400cf559de4", "type": "github" }, "original": {