1
0
mirror of https://github.com/juanfont/headscale.git synced 2025-09-25 17:51:11 +02:00

derp: add retry, do not replace if fails

This commits makes updating of the DERP map from file and url
more robust by retrying with exponential backoff if it fails
and upon failure, keep the old DERP map if we cannot successfully
build a new one.

Fixes #2694

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2025-08-21 12:17:43 +02:00
parent 1c02b0d45a
commit cce63a27d6
No known key found for this signature in database
5 changed files with 57 additions and 62 deletions

View File

@ -17,6 +17,7 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/cenkalti/backoff/v5"
"github.com/davecgh/go-spew/spew" "github.com/davecgh/go-spew/spew"
"github.com/gorilla/mux" "github.com/gorilla/mux"
grpcRuntime "github.com/grpc-ecosystem/grpc-gateway/v2/runtime" grpcRuntime "github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
@ -284,12 +285,24 @@ func (h *Headscale) scheduledTasks(ctx context.Context) {
case <-derpTickerChan: case <-derpTickerChan:
log.Info().Msg("Fetching DERPMap updates") log.Info().Msg("Fetching DERPMap updates")
derpMap := derp.GetDERPMap(h.cfg.DERP) derpMap, err := backoff.Retry(ctx, func() (*tailcfg.DERPMap, error) {
derpMap, err := derp.GetDERPMap(h.cfg.DERP)
if err != nil {
return nil, err
}
if h.cfg.DERP.ServerEnabled && h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion { if h.cfg.DERP.ServerEnabled && h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion {
region, _ := h.DERPServer.GenerateRegion() region, _ := h.DERPServer.GenerateRegion()
derpMap.Regions[region.RegionID] = &region derpMap.Regions[region.RegionID] = &region
} }
return derpMap, nil
}, backoff.WithBackOff(backoff.NewExponentialBackOff()))
if err != nil {
log.Error().Err(err).Msg("failed to build new DERPMap, retrying later")
continue
}
h.state.SetDERPMap(derpMap)
h.Change(change.DERPSet) h.Change(change.DERPSet)
case records, ok := <-extraRecordsUpdate: case records, ok := <-extraRecordsUpdate:
@ -516,29 +529,31 @@ func (h *Headscale) Serve() error {
h.mapBatcher.Start() h.mapBatcher.Start()
defer h.mapBatcher.Close() defer h.mapBatcher.Close()
// TODO(kradalby): fix state part.
if h.cfg.DERP.ServerEnabled { if h.cfg.DERP.ServerEnabled {
// When embedded DERP is enabled we always need a STUN server // When embedded DERP is enabled we always need a STUN server
if h.cfg.DERP.STUNAddr == "" { if h.cfg.DERP.STUNAddr == "" {
return errSTUNAddressNotSet return errSTUNAddressNotSet
} }
region, err := h.DERPServer.GenerateRegion()
if err != nil {
return fmt.Errorf("generating DERP region for embedded server: %w", err)
}
if h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion {
h.state.DERPMap().Regions[region.RegionID] = &region
}
go h.DERPServer.ServeSTUN() go h.DERPServer.ServeSTUN()
} }
if len(h.state.DERPMap().Regions) == 0 { derpMap, err := derp.GetDERPMap(h.cfg.DERP)
if err != nil {
return fmt.Errorf("failed to get DERPMap: %w", err)
}
if h.cfg.DERP.ServerEnabled && h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion {
region, _ := h.DERPServer.GenerateRegion()
derpMap.Regions[region.RegionID] = &region
}
if len(derpMap.Regions) == 0 {
return errEmptyInitialDERPMap return errEmptyInitialDERPMap
} }
h.state.SetDERPMap(derpMap)
// Start ephemeral node garbage collector and schedule all nodes // Start ephemeral node garbage collector and schedule all nodes
// that are already in the database and ephemeral. If they are still // that are already in the database and ephemeral. If they are still
// around between restarts, they will reconnect and the GC will // around between restarts, they will reconnect and the GC will

View File

@ -10,7 +10,7 @@ import (
"os" "os"
"github.com/juanfont/headscale/hscontrol/types" "github.com/juanfont/headscale/hscontrol/types"
"github.com/rs/zerolog/log" "github.com/spf13/viper"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
) )
@ -79,26 +79,16 @@ func mergeDERPMaps(derpMaps []*tailcfg.DERPMap) *tailcfg.DERPMap {
return &result return &result
} }
func GetDERPMap(cfg types.DERPConfig) *tailcfg.DERPMap { func GetDERPMap(cfg types.DERPConfig) (*tailcfg.DERPMap, error) {
var derpMaps []*tailcfg.DERPMap var derpMaps []*tailcfg.DERPMap
if cfg.DERPMap != nil { if cfg.DERPMap != nil {
derpMaps = append(derpMaps, cfg.DERPMap) derpMaps = append(derpMaps, cfg.DERPMap)
} }
for _, path := range cfg.Paths { for _, path := range cfg.Paths {
log.Debug().
Str("func", "GetDERPMap").
Str("path", path).
Msg("Loading DERPMap from path")
derpMap, err := loadDERPMapFromPath(path) derpMap, err := loadDERPMapFromPath(path)
if err != nil { if err != nil {
log.Error(). return nil, err
Str("func", "GetDERPMap").
Str("path", path).
Err(err).
Msg("Could not load DERP map from path")
break
} }
derpMaps = append(derpMaps, derpMap) derpMaps = append(derpMaps, derpMap)
@ -106,18 +96,8 @@ func GetDERPMap(cfg types.DERPConfig) *tailcfg.DERPMap {
for _, addr := range cfg.URLs { for _, addr := range cfg.URLs {
derpMap, err := loadDERPMapFromURL(addr) derpMap, err := loadDERPMapFromURL(addr)
log.Debug().
Str("func", "GetDERPMap").
Str("url", addr.String()).
Msg("Loading DERPMap from path")
if err != nil { if err != nil {
log.Error(). return nil, err
Str("func", "GetDERPMap").
Str("url", addr.String()).
Err(err).
Msg("Could not load DERP map from path")
break
} }
derpMaps = append(derpMaps, derpMap) derpMaps = append(derpMaps, derpMap)
@ -125,7 +105,5 @@ func GetDERPMap(cfg types.DERPConfig) *tailcfg.DERPMap {
derpMap := mergeDERPMaps(derpMaps) derpMap := mergeDERPMaps(derpMaps)
log.Trace().Interface("derpMap", derpMap).Msg("DERPMap loaded") return derpMap, nil
return derpMap
} }

View File

@ -276,7 +276,7 @@ func DERPProbeHandler(
// An example implementation is found here https://derp.tailscale.com/bootstrap-dns // An example implementation is found here https://derp.tailscale.com/bootstrap-dns
// Coordination server is included automatically, since local DERP is using the same DNS Name in d.serverURL. // Coordination server is included automatically, since local DERP is using the same DNS Name in d.serverURL.
func DERPBootstrapDNSHandler( func DERPBootstrapDNSHandler(
derpMap *tailcfg.DERPMap, derpMap tailcfg.DERPMapView,
) func(http.ResponseWriter, *http.Request) { ) func(http.ResponseWriter, *http.Request) {
return func( return func(
writer http.ResponseWriter, writer http.ResponseWriter,
@ -287,18 +287,18 @@ func DERPBootstrapDNSHandler(
resolvCtx, cancel := context.WithTimeout(req.Context(), time.Minute) resolvCtx, cancel := context.WithTimeout(req.Context(), time.Minute)
defer cancel() defer cancel()
var resolver net.Resolver var resolver net.Resolver
for _, region := range derpMap.Regions { for _, region := range derpMap.Regions().All() {
for _, node := range region.Nodes { // we don't care if we override some nodes for _, node := range region.Nodes().All() { // we don't care if we override some nodes
addrs, err := resolver.LookupIP(resolvCtx, "ip", node.HostName) addrs, err := resolver.LookupIP(resolvCtx, "ip", node.HostName())
if err != nil { if err != nil {
log.Trace(). log.Trace().
Caller(). Caller().
Err(err). Err(err).
Msgf("bootstrap DNS lookup failed %q", node.HostName) Msgf("bootstrap DNS lookup failed %q", node.HostName())
continue continue
} }
dnsEntries[node.HostName] = addrs dnsEntries[node.HostName()] = addrs
} }
} }
writer.Header().Set("Content-Type", "application/json") writer.Header().Set("Content-Type", "application/json")

View File

@ -79,7 +79,7 @@ func (b *MapResponseBuilder) WithSelfNode() *MapResponseBuilder {
// WithDERPMap adds the DERP map to the response // WithDERPMap adds the DERP map to the response
func (b *MapResponseBuilder) WithDERPMap() *MapResponseBuilder { func (b *MapResponseBuilder) WithDERPMap() *MapResponseBuilder {
b.resp.DERPMap = b.mapper.state.DERPMap() b.resp.DERPMap = b.mapper.state.DERPMap().AsStruct()
return b return b
} }

View File

@ -9,10 +9,10 @@ import (
"io" "io"
"net/netip" "net/netip"
"os" "os"
"sync/atomic"
"time" "time"
hsdb "github.com/juanfont/headscale/hscontrol/db" hsdb "github.com/juanfont/headscale/hscontrol/db"
"github.com/juanfont/headscale/hscontrol/derp"
"github.com/juanfont/headscale/hscontrol/policy" "github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/policy/matcher" "github.com/juanfont/headscale/hscontrol/policy/matcher"
"github.com/juanfont/headscale/hscontrol/routes" "github.com/juanfont/headscale/hscontrol/routes"
@ -55,7 +55,7 @@ type State struct {
// ipAlloc manages IP address allocation for nodes // ipAlloc manages IP address allocation for nodes
ipAlloc *hsdb.IPAllocator ipAlloc *hsdb.IPAllocator
// derpMap contains the current DERP relay configuration // derpMap contains the current DERP relay configuration
derpMap *tailcfg.DERPMap derpMap atomic.Pointer[tailcfg.DERPMap]
// polMan handles policy evaluation and management // polMan handles policy evaluation and management
polMan policy.PolicyManager polMan policy.PolicyManager
// registrationCache caches node registration data to reduce database load // registrationCache caches node registration data to reduce database load
@ -86,8 +86,6 @@ func NewState(cfg *types.Config) (*State, error) {
return nil, fmt.Errorf("init ip allocatior: %w", err) return nil, fmt.Errorf("init ip allocatior: %w", err)
} }
derpMap := derp.GetDERPMap(cfg.DERP)
nodes, err := db.ListNodes() nodes, err := db.ListNodes()
if err != nil { if err != nil {
return nil, fmt.Errorf("loading nodes: %w", err) return nil, fmt.Errorf("loading nodes: %w", err)
@ -107,17 +105,17 @@ func NewState(cfg *types.Config) (*State, error) {
return nil, fmt.Errorf("init policy manager: %w", err) return nil, fmt.Errorf("init policy manager: %w", err)
} }
return &State{ s := &State{
cfg: cfg, cfg: cfg,
db: db, db: db,
ipAlloc: ipAlloc, ipAlloc: ipAlloc,
// TODO(kradalby): Update DERPMap
derpMap: derpMap,
polMan: polMan, polMan: polMan,
registrationCache: registrationCache, registrationCache: registrationCache,
primaryRoutes: routes.New(), primaryRoutes: routes.New(),
}, nil }
return s, nil
} }
// Close gracefully shuts down the State instance and releases all resources. // Close gracefully shuts down the State instance and releases all resources.
@ -170,9 +168,14 @@ func policyBytes(db *hsdb.HSDatabase, cfg *types.Config) ([]byte, error) {
return nil, fmt.Errorf("%w: %s", ErrUnsupportedPolicyMode, cfg.Policy.Mode) return nil, fmt.Errorf("%w: %s", ErrUnsupportedPolicyMode, cfg.Policy.Mode)
} }
// SetDERPMap updates the DERP relay configuration.
func (s *State) SetDERPMap(dm *tailcfg.DERPMap) {
s.derpMap.Store(dm)
}
// DERPMap returns the current DERP relay configuration for peer-to-peer connectivity. // DERPMap returns the current DERP relay configuration for peer-to-peer connectivity.
func (s *State) DERPMap() *tailcfg.DERPMap { func (s *State) DERPMap() tailcfg.DERPMapView {
return s.derpMap return s.derpMap.Load().View()
} }
// ReloadPolicy reloads the access control policy and triggers auto-approval if changed. // ReloadPolicy reloads the access control policy and triggers auto-approval if changed.
@ -209,7 +212,6 @@ func (s *State) CreateUser(user types.User) (*types.User, bool, error) {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
if err := s.db.DB.Save(&user).Error; err != nil { if err := s.db.DB.Save(&user).Error; err != nil {
return nil, false, fmt.Errorf("creating user: %w", err) return nil, false, fmt.Errorf("creating user: %w", err)
} }