Răsfoiți Sursa

fix(traffic-writer): replace sync.Once with Start/Stop cycle so SIGHUP restart works

After a SIGHUP-driven panel restart (which is exactly what the frontend
triggers after a successful DB import via /panel/setting/restartPanel),
the previous implementation deadlocked:

1. server.Stop() called StopTrafficWriter — cancels the context and waits
   for the consumer goroutine to exit. The goroutine dies.
2. server.Start() called StartTrafficWriter, but sync.Once had already
   fired, so it was a no-op. twQueue still pointed to the old channel
   with no consumer.
3. startTask() → RestartXray(true) → GetXrayConfig() →
   InboundService.AddTraffic(nil, nil) → submitTrafficWrite. The send
   to twQueue succeeded (buffer space) but <-req.done blocked forever
   because no goroutine was draining the channel.
4. RestartXray held the global xray lock for the entire hang, so every
   subsequent restart attempt from the panel UI also blocked on
   lock.Lock(). User-visible symptom: xray stopped silently after DB
   import and no panel action could revive it.

Replace sync.Once with a mutex-guarded Start that spawns a fresh
goroutine on each cycle, and a Stop that resets the package state so
the next Start works. runTrafficWriter now takes its channels as
parameters instead of reading package vars, so the old goroutine can't
interfere with a new one if their lifetimes briefly overlap.
MHSanaei 13 ore în urmă
părinte
comite
8f3202f431
1 a modificat fișierele cu 47 adăugiri și 18 ștergeri
  1. 47 18
      web/service/traffic_writer.go

+ 47 - 18
web/service/traffic_writer.go

@@ -21,39 +21,64 @@ type trafficWriteRequest struct {
 }
 
 var (
+	twMu     sync.Mutex
 	twQueue  chan *trafficWriteRequest
-	twCtx    context.Context
 	twCancel context.CancelFunc
 	twDone   chan struct{}
-	twOnce   sync.Once
 )
 
+// StartTrafficWriter spins up the serial writer goroutine. Safe to call again
+// after StopTrafficWriter — each Start/Stop cycle gets fresh channels. The
+// previous sync.Once-based implementation deadlocked after a SIGHUP-driven
+// panel restart: Stop killed the consumer goroutine but Once prevented Start
+// from spawning a new one, so every later submitTrafficWrite blocked forever
+// on <-req.done with no consumer (including the AddTraffic call inside
+// XrayService.GetXrayConfig that runs from startTask).
 func StartTrafficWriter() {
-	twOnce.Do(func() {
-		twQueue = make(chan *trafficWriteRequest, trafficWriterQueueSize)
-		twCtx, twCancel = context.WithCancel(context.Background())
-		twDone = make(chan struct{})
-		go runTrafficWriter()
-	})
+	twMu.Lock()
+	defer twMu.Unlock()
+	if twQueue != nil {
+		return
+	}
+	queue := make(chan *trafficWriteRequest, trafficWriterQueueSize)
+	ctx, cancel := context.WithCancel(context.Background())
+	done := make(chan struct{})
+	twQueue = queue
+	twCancel = cancel
+	twDone = done
+	go runTrafficWriter(queue, ctx, done)
 }
 
+// StopTrafficWriter cancels the writer context and waits for the goroutine to
+// drain any pending requests before returning. Resets the package state so a
+// subsequent StartTrafficWriter can spawn a fresh consumer.
 func StopTrafficWriter() {
-	if twCancel != nil {
-		twCancel()
-		<-twDone
+	twMu.Lock()
+	cancel := twCancel
+	done := twDone
+	twQueue = nil
+	twCancel = nil
+	twDone = nil
+	twMu.Unlock()
+
+	if cancel != nil {
+		cancel()
+	}
+	if done != nil {
+		<-done
 	}
 }
 
-func runTrafficWriter() {
-	defer close(twDone)
+func runTrafficWriter(queue chan *trafficWriteRequest, ctx context.Context, done chan struct{}) {
+	defer close(done)
 	for {
 		select {
-		case req := <-twQueue:
+		case req := <-queue:
 			req.done <- safeApply(req.apply)
-		case <-twCtx.Done():
+		case <-ctx.Done():
 			for {
 				select {
-				case req := <-twQueue:
+				case req := <-queue:
 					req.done <- safeApply(req.apply)
 				default:
 					return
@@ -74,12 +99,16 @@ func safeApply(fn func() error) (err error) {
 }
 
 func submitTrafficWrite(fn func() error) error {
-	if twQueue == nil {
+	twMu.Lock()
+	queue := twQueue
+	twMu.Unlock()
+
+	if queue == nil {
 		return safeApply(fn)
 	}
 	req := &trafficWriteRequest{apply: fn, done: make(chan error, 1)}
 	select {
-	case twQueue <- req:
+	case queue <- req:
 	case <-time.After(trafficWriterSubmitTimeout):
 		return errors.New("traffic writer queue full")
 	}