1
0
mirror of https://github.com/juanfont/headscale.git synced 2025-09-25 17:51:11 +02:00

derp: add retry, do not replace if fails

This commits makes updating of the DERP map from file and url
more robust by retrying with exponential backoff if it fails
and upon failure, keep the old DERP map if we cannot successfully
build a new one.

Fixes #2694

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2025-08-21 12:17:43 +02:00
parent 1c02b0d45a
commit cce63a27d6
No known key found for this signature in database
5 changed files with 57 additions and 62 deletions

View File

@ -17,6 +17,7 @@ import (
"syscall"
"time"
"github.com/cenkalti/backoff/v5"
"github.com/davecgh/go-spew/spew"
"github.com/gorilla/mux"
grpcRuntime "github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
@ -284,12 +285,24 @@ func (h *Headscale) scheduledTasks(ctx context.Context) {
case <-derpTickerChan:
log.Info().Msg("Fetching DERPMap updates")
derpMap := derp.GetDERPMap(h.cfg.DERP)
derpMap, err := backoff.Retry(ctx, func() (*tailcfg.DERPMap, error) {
derpMap, err := derp.GetDERPMap(h.cfg.DERP)
if err != nil {
return nil, err
}
if h.cfg.DERP.ServerEnabled && h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion {
region, _ := h.DERPServer.GenerateRegion()
derpMap.Regions[region.RegionID] = &region
}
return derpMap, nil
}, backoff.WithBackOff(backoff.NewExponentialBackOff()))
if err != nil {
log.Error().Err(err).Msg("failed to build new DERPMap, retrying later")
continue
}
h.state.SetDERPMap(derpMap)
h.Change(change.DERPSet)
case records, ok := <-extraRecordsUpdate:
@ -516,29 +529,31 @@ func (h *Headscale) Serve() error {
h.mapBatcher.Start()
defer h.mapBatcher.Close()
// TODO(kradalby): fix state part.
if h.cfg.DERP.ServerEnabled {
// When embedded DERP is enabled we always need a STUN server
if h.cfg.DERP.STUNAddr == "" {
return errSTUNAddressNotSet
}
region, err := h.DERPServer.GenerateRegion()
if err != nil {
return fmt.Errorf("generating DERP region for embedded server: %w", err)
}
if h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion {
h.state.DERPMap().Regions[region.RegionID] = &region
}
go h.DERPServer.ServeSTUN()
}
if len(h.state.DERPMap().Regions) == 0 {
derpMap, err := derp.GetDERPMap(h.cfg.DERP)
if err != nil {
return fmt.Errorf("failed to get DERPMap: %w", err)
}
if h.cfg.DERP.ServerEnabled && h.cfg.DERP.AutomaticallyAddEmbeddedDerpRegion {
region, _ := h.DERPServer.GenerateRegion()
derpMap.Regions[region.RegionID] = &region
}
if len(derpMap.Regions) == 0 {
return errEmptyInitialDERPMap
}
h.state.SetDERPMap(derpMap)
// Start ephemeral node garbage collector and schedule all nodes
// that are already in the database and ephemeral. If they are still
// around between restarts, they will reconnect and the GC will

View File

@ -10,7 +10,7 @@ import (
"os"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
"gopkg.in/yaml.v3"
"tailscale.com/tailcfg"
)
@ -79,26 +79,16 @@ func mergeDERPMaps(derpMaps []*tailcfg.DERPMap) *tailcfg.DERPMap {
return &result
}
func GetDERPMap(cfg types.DERPConfig) *tailcfg.DERPMap {
func GetDERPMap(cfg types.DERPConfig) (*tailcfg.DERPMap, error) {
var derpMaps []*tailcfg.DERPMap
if cfg.DERPMap != nil {
derpMaps = append(derpMaps, cfg.DERPMap)
}
for _, path := range cfg.Paths {
log.Debug().
Str("func", "GetDERPMap").
Str("path", path).
Msg("Loading DERPMap from path")
derpMap, err := loadDERPMapFromPath(path)
if err != nil {
log.Error().
Str("func", "GetDERPMap").
Str("path", path).
Err(err).
Msg("Could not load DERP map from path")
break
return nil, err
}
derpMaps = append(derpMaps, derpMap)
@ -106,18 +96,8 @@ func GetDERPMap(cfg types.DERPConfig) *tailcfg.DERPMap {
for _, addr := range cfg.URLs {
derpMap, err := loadDERPMapFromURL(addr)
log.Debug().
Str("func", "GetDERPMap").
Str("url", addr.String()).
Msg("Loading DERPMap from path")
if err != nil {
log.Error().
Str("func", "GetDERPMap").
Str("url", addr.String()).
Err(err).
Msg("Could not load DERP map from path")
break
return nil, err
}
derpMaps = append(derpMaps, derpMap)
@ -125,7 +105,5 @@ func GetDERPMap(cfg types.DERPConfig) *tailcfg.DERPMap {
derpMap := mergeDERPMaps(derpMaps)
log.Trace().Interface("derpMap", derpMap).Msg("DERPMap loaded")
return derpMap
return derpMap, nil
}

View File

@ -276,7 +276,7 @@ func DERPProbeHandler(
// An example implementation is found here https://derp.tailscale.com/bootstrap-dns
// Coordination server is included automatically, since local DERP is using the same DNS Name in d.serverURL.
func DERPBootstrapDNSHandler(
derpMap *tailcfg.DERPMap,
derpMap tailcfg.DERPMapView,
) func(http.ResponseWriter, *http.Request) {
return func(
writer http.ResponseWriter,
@ -287,18 +287,18 @@ func DERPBootstrapDNSHandler(
resolvCtx, cancel := context.WithTimeout(req.Context(), time.Minute)
defer cancel()
var resolver net.Resolver
for _, region := range derpMap.Regions {
for _, node := range region.Nodes { // we don't care if we override some nodes
addrs, err := resolver.LookupIP(resolvCtx, "ip", node.HostName)
for _, region := range derpMap.Regions().All() {
for _, node := range region.Nodes().All() { // we don't care if we override some nodes
addrs, err := resolver.LookupIP(resolvCtx, "ip", node.HostName())
if err != nil {
log.Trace().
Caller().
Err(err).
Msgf("bootstrap DNS lookup failed %q", node.HostName)
Msgf("bootstrap DNS lookup failed %q", node.HostName())
continue
}
dnsEntries[node.HostName] = addrs
dnsEntries[node.HostName()] = addrs
}
}
writer.Header().Set("Content-Type", "application/json")

View File

@ -79,7 +79,7 @@ func (b *MapResponseBuilder) WithSelfNode() *MapResponseBuilder {
// WithDERPMap adds the DERP map to the response
func (b *MapResponseBuilder) WithDERPMap() *MapResponseBuilder {
b.resp.DERPMap = b.mapper.state.DERPMap()
b.resp.DERPMap = b.mapper.state.DERPMap().AsStruct()
return b
}

View File

@ -9,10 +9,10 @@ import (
"io"
"net/netip"
"os"
"sync/atomic"
"time"
hsdb "github.com/juanfont/headscale/hscontrol/db"
"github.com/juanfont/headscale/hscontrol/derp"
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/policy/matcher"
"github.com/juanfont/headscale/hscontrol/routes"
@ -55,7 +55,7 @@ type State struct {
// ipAlloc manages IP address allocation for nodes
ipAlloc *hsdb.IPAllocator
// derpMap contains the current DERP relay configuration
derpMap *tailcfg.DERPMap
derpMap atomic.Pointer[tailcfg.DERPMap]
// polMan handles policy evaluation and management
polMan policy.PolicyManager
// registrationCache caches node registration data to reduce database load
@ -86,8 +86,6 @@ func NewState(cfg *types.Config) (*State, error) {
return nil, fmt.Errorf("init ip allocatior: %w", err)
}
derpMap := derp.GetDERPMap(cfg.DERP)
nodes, err := db.ListNodes()
if err != nil {
return nil, fmt.Errorf("loading nodes: %w", err)
@ -107,17 +105,17 @@ func NewState(cfg *types.Config) (*State, error) {
return nil, fmt.Errorf("init policy manager: %w", err)
}
return &State{
s := &State{
cfg: cfg,
db: db,
ipAlloc: ipAlloc,
// TODO(kradalby): Update DERPMap
derpMap: derpMap,
polMan: polMan,
registrationCache: registrationCache,
primaryRoutes: routes.New(),
}, nil
}
return s, nil
}
// Close gracefully shuts down the State instance and releases all resources.
@ -170,9 +168,14 @@ func policyBytes(db *hsdb.HSDatabase, cfg *types.Config) ([]byte, error) {
return nil, fmt.Errorf("%w: %s", ErrUnsupportedPolicyMode, cfg.Policy.Mode)
}
// SetDERPMap updates the DERP relay configuration.
func (s *State) SetDERPMap(dm *tailcfg.DERPMap) {
s.derpMap.Store(dm)
}
// DERPMap returns the current DERP relay configuration for peer-to-peer connectivity.
func (s *State) DERPMap() *tailcfg.DERPMap {
return s.derpMap
func (s *State) DERPMap() tailcfg.DERPMapView {
return s.derpMap.Load().View()
}
// ReloadPolicy reloads the access control policy and triggers auto-approval if changed.
@ -209,7 +212,6 @@ func (s *State) CreateUser(user types.User) (*types.User, bool, error) {
s.mu.Lock()
defer s.mu.Unlock()
if err := s.db.DB.Save(&user).Error; err != nil {
return nil, false, fmt.Errorf("creating user: %w", err)
}