node_traffic_sync_job.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. package job
  2. import (
  3. "context"
  4. "strings"
  5. "sync"
  6. "time"
  7. "github.com/mhsanaei/3x-ui/v3/internal/database/model"
  8. "github.com/mhsanaei/3x-ui/v3/internal/logger"
  9. "github.com/mhsanaei/3x-ui/v3/internal/util/common"
  10. "github.com/mhsanaei/3x-ui/v3/internal/web/runtime"
  11. "github.com/mhsanaei/3x-ui/v3/internal/web/service"
  12. "github.com/mhsanaei/3x-ui/v3/internal/web/websocket"
  13. "github.com/mhsanaei/3x-ui/v3/internal/xray"
  14. )
  15. const (
  16. nodeTrafficSyncConcurrency = 8
  17. nodeTrafficSyncRequestTimeout = 4 * time.Second
  18. nodeReconcileTimeout = 30 * time.Second
  19. nodeClientIpSyncInterval = 10 * time.Second
  20. nodeClientIpSyncTimeout = 6 * time.Second
  21. nodeGlobalPushInterval = 30 * time.Second
  22. // nodeInboundSpeedWindowMs is the poll window node-inbound speed deltas are
  23. // normalized to; it MUST match the dashboard's TRAFFIC_POLL_INTERVAL_S (5s),
  24. // the fixed divisor the frontend applies to turn a delta into a rate.
  25. nodeInboundSpeedWindowMs int64 = 5000
  26. )
  27. // inboundSample is a node inbound's last-seen cumulative up/down and the time
  28. // (unix millis) its counter last changed, used to derive a normalized speed.
  29. type inboundSample struct {
  30. up, down, at int64
  31. }
  32. type NodeTrafficSyncJob struct {
  33. nodeService service.NodeService
  34. inboundService service.InboundService
  35. settingService service.SettingService
  36. xrayService service.XrayService
  37. running sync.Mutex
  38. structural atomicBool
  39. ipSyncMu sync.Mutex
  40. lastIpSync int64
  41. globalPushMu sync.Mutex
  42. lastGlobalPush int64
  43. // noGuidIpEndpoint tracks nodes (by id) whose client-IP attribution endpoint
  44. // returned 404, so an old-build node is noted once instead of every cycle.
  45. noGuidIpEndpoint sync.Map
  46. // prevInboundTotals holds the previous poll's cumulative up/down (and the time
  47. // the counter last changed) per node inbound tag, so the next poll can derive
  48. // a per-inbound speed delta — node inbounds have no local Xray poll. Touched
  49. // only from Run (serialized).
  50. prevInboundTotals map[string]inboundSample
  51. }
  52. type atomicBool struct {
  53. mu sync.Mutex
  54. v bool
  55. }
  56. func (a *atomicBool) set() {
  57. a.mu.Lock()
  58. a.v = true
  59. a.mu.Unlock()
  60. }
  61. func (a *atomicBool) takeAndReset() bool {
  62. a.mu.Lock()
  63. v := a.v
  64. a.v = false
  65. a.mu.Unlock()
  66. return v
  67. }
  68. func NewNodeTrafficSyncJob() *NodeTrafficSyncJob {
  69. return &NodeTrafficSyncJob{}
  70. }
  71. func (j *NodeTrafficSyncJob) Run() {
  72. if !j.running.TryLock() {
  73. return
  74. }
  75. defer j.running.Unlock()
  76. mgr := runtime.GetManager()
  77. if mgr == nil {
  78. return
  79. }
  80. nodes, err := j.nodeService.GetAll()
  81. if err != nil {
  82. logger.Warning("node traffic sync: load nodes failed:", err)
  83. return
  84. }
  85. if len(nodes) == 0 {
  86. return
  87. }
  88. // Decide once per tick whether this run also syncs client IPs, and stamp the
  89. // clock before the loop so two back-to-back 5s ticks can't both qualify.
  90. doIpSync := false
  91. j.ipSyncMu.Lock()
  92. if now := time.Now().Unix(); now-j.lastIpSync >= int64(nodeClientIpSyncInterval/time.Second) {
  93. doIpSync = true
  94. j.lastIpSync = now
  95. }
  96. j.ipSyncMu.Unlock()
  97. sem := make(chan struct{}, nodeTrafficSyncConcurrency)
  98. var wg sync.WaitGroup
  99. for _, n := range nodes {
  100. if !n.Enable || n.Status != "online" {
  101. continue
  102. }
  103. wg.Add(1)
  104. sem <- struct{}{}
  105. n := n
  106. common.GoRecover("node-traffic-sync:"+n.Name, func() {
  107. defer wg.Done()
  108. defer func() { <-sem }()
  109. j.syncOne(mgr, n, doIpSync)
  110. })
  111. }
  112. wg.Wait()
  113. _, clientsDisabled, err := j.inboundService.AddTraffic(nil, nil)
  114. if err != nil {
  115. logger.Warning("node traffic sync: depletion check failed:", err)
  116. }
  117. if clientsDisabled {
  118. if restartOnDisable, settingErr := j.settingService.GetRestartXrayOnClientDisable(); settingErr == nil && restartOnDisable {
  119. if err := j.xrayService.RestartXray(true); err != nil {
  120. logger.Warning("node traffic sync: restart xray after disabling clients failed:", err)
  121. j.xrayService.SetToNeedRestart()
  122. }
  123. } else if settingErr != nil {
  124. logger.Warning("node traffic sync: get RestartXrayOnClientDisable failed:", settingErr)
  125. }
  126. j.structural.set()
  127. }
  128. j.maybePushGlobals(mgr, nodes)
  129. lastOnline, err := j.inboundService.GetClientsLastOnline()
  130. if err != nil {
  131. logger.Warning("node traffic sync: get last-online failed:", err)
  132. }
  133. if lastOnline == nil {
  134. lastOnline = map[string]int64{}
  135. }
  136. // Prune stale local-online entries (no local active emails or inbound tags
  137. // to add here — only the local xray poll feeds those) so a stopped local
  138. // xray's clients and inbounds still age out between traffic polls.
  139. j.inboundService.RefreshLocalOnlineClients(nil, nil)
  140. // Derive per-node-inbound speed every tick (keeps the baseline fresh even
  141. // with no dashboard open); only broadcast it when someone is watching.
  142. inboundSpeed := j.nodeInboundSpeed()
  143. if !websocket.HasClients() {
  144. return
  145. }
  146. online := j.inboundService.GetOnlineClients()
  147. if online == nil {
  148. online = []string{}
  149. }
  150. trafficPayload := map[string]any{
  151. "onlineClients": online,
  152. "onlineByGuid": j.inboundService.GetOnlineClientsByGuid(),
  153. "activeInbounds": j.inboundService.GetActiveInboundsByGuid(),
  154. "lastOnlineMap": lastOnline,
  155. }
  156. // Always send the key so the dashboard clears node inbounds that went idle
  157. // this tick. A nil result (query error) marshals to null and is skipped
  158. // client-side, leaving the last shown value untouched; an empty (non-nil)
  159. // slice marshals to [] and clears stale speeds.
  160. trafficPayload["nodeTraffics"] = inboundSpeed
  161. websocket.BroadcastTraffic(trafficPayload)
  162. clientStats := map[string]any{}
  163. if stats, err := j.inboundService.GetAllClientTraffics(); err != nil {
  164. logger.Warning("node traffic sync: get all client traffics for websocket failed:", err)
  165. } else if len(stats) > 0 {
  166. clientStats["clients"] = stats
  167. }
  168. if summary, err := j.inboundService.GetInboundsTrafficSummary(); err != nil {
  169. logger.Warning("node traffic sync: get inbounds summary for websocket failed:", err)
  170. } else if len(summary) > 0 {
  171. clientStats["inbounds"] = summary
  172. }
  173. if len(clientStats) > 0 {
  174. websocket.BroadcastClientStats(clientStats)
  175. }
  176. if j.structural.takeAndReset() {
  177. websocket.BroadcastInvalidate(websocket.MessageTypeInbounds)
  178. websocket.BroadcastInvalidate(websocket.MessageTypeClients)
  179. }
  180. }
  181. // nodeInboundSpeed derives a per-node-inbound speed delta by diffing the current
  182. // cumulative up/down against the previous poll's, keyed by the central tag the
  183. // dashboard matches. The node's counter keeps climbing while the master can't
  184. // reach it, so the first delta after a gap (node outage, skipped poll, slow
  185. // node) spans more than one poll window; it is normalized to the fixed
  186. // nodeInboundSpeedWindowMs using the real elapsed time so the dashboard's fixed
  187. // divisor yields the true average rate over the gap instead of an impossible
  188. // one-tick spike. The change timestamp only advances when the value actually
  189. // moves, so an idle stretch is averaged correctly when traffic resumes. A reset
  190. // rebaselines to the lower value; a first-seen tag yields no delta until the
  191. // next poll.
  192. func (j *NodeTrafficSyncJob) nodeInboundSpeed() []*xray.Traffic {
  193. totals, err := j.inboundService.GetNodeInboundTrafficTotals()
  194. if err != nil {
  195. return nil
  196. }
  197. now := time.Now().UnixMilli()
  198. deltas := make([]*xray.Traffic, 0, len(totals))
  199. next := make(map[string]inboundSample, len(totals))
  200. for tag, cur := range totals {
  201. prev, ok := j.prevInboundTotals[tag]
  202. if !ok {
  203. next[tag] = inboundSample{up: cur[0], down: cur[1], at: now}
  204. continue
  205. }
  206. dUp := cur[0] - prev.up
  207. dDown := cur[1] - prev.down
  208. if dUp <= 0 && dDown <= 0 {
  209. // No movement, or a counter reset: hold the change timestamp so a
  210. // later jump is averaged over the real elapsed window, not shown as a
  211. // spike. Adopt the lower value on a reset.
  212. if cur[0] < prev.up || cur[1] < prev.down {
  213. next[tag] = inboundSample{up: cur[0], down: cur[1], at: now}
  214. } else {
  215. next[tag] = prev
  216. }
  217. continue
  218. }
  219. if dUp < 0 {
  220. dUp = 0
  221. }
  222. if dDown < 0 {
  223. dDown = 0
  224. }
  225. elapsed := now - prev.at
  226. if elapsed < nodeInboundSpeedWindowMs {
  227. elapsed = nodeInboundSpeedWindowMs
  228. }
  229. up := dUp * nodeInboundSpeedWindowMs / elapsed
  230. down := dDown * nodeInboundSpeedWindowMs / elapsed
  231. if up > 0 || down > 0 {
  232. deltas = append(deltas, &xray.Traffic{Tag: tag, IsInbound: true, Up: up, Down: down})
  233. }
  234. next[tag] = inboundSample{up: cur[0], down: cur[1], at: now}
  235. }
  236. j.prevInboundTotals = next
  237. return deltas
  238. }
  239. // maybePushGlobals broadcasts this panel's aggregated per-client usage to its
  240. // online nodes so each node can display the client's cross-panel total and
  241. // enforce its quota locally (see InboundService.AcceptGlobalTraffic). Scoped
  242. // per node to the clients that node actually hosts, and throttled — the
  243. // aggregates only need to reach nodes on a human timescale, not every poll.
  244. func (j *NodeTrafficSyncJob) maybePushGlobals(mgr *runtime.Manager, nodes []*model.Node) {
  245. j.globalPushMu.Lock()
  246. now := time.Now().Unix()
  247. if now-j.lastGlobalPush < int64(nodeGlobalPushInterval/time.Second) {
  248. j.globalPushMu.Unlock()
  249. return
  250. }
  251. j.lastGlobalPush = now
  252. j.globalPushMu.Unlock()
  253. masterGuid, err := j.settingService.GetPanelGuid()
  254. if err != nil || masterGuid == "" {
  255. return
  256. }
  257. sem := make(chan struct{}, nodeTrafficSyncConcurrency)
  258. var wg sync.WaitGroup
  259. for _, n := range nodes {
  260. if !n.Enable || n.Status != "online" {
  261. continue
  262. }
  263. remote, err := mgr.RemoteFor(n)
  264. if err != nil {
  265. continue
  266. }
  267. traffics, err := j.inboundService.GetNodeClientTraffics(n.Id)
  268. if err != nil {
  269. logger.Warningf("node traffic sync: load globals for %s failed: %v", n.Name, err)
  270. continue
  271. }
  272. if len(traffics) == 0 {
  273. continue
  274. }
  275. wg.Add(1)
  276. sem <- struct{}{}
  277. n, remote, traffics := n, remote, traffics
  278. common.GoRecover("node-global-push:"+n.Name, func() {
  279. defer wg.Done()
  280. defer func() { <-sem }()
  281. ctx, cancel := context.WithTimeout(context.Background(), nodeTrafficSyncRequestTimeout)
  282. defer cancel()
  283. if err := remote.PushGlobalClientTraffics(ctx, masterGuid, traffics); err != nil {
  284. // An old-build node without the endpoint answers 404 — not worth a
  285. // warning every cycle.
  286. if strings.Contains(err.Error(), "HTTP 404") {
  287. logger.Debugf("node traffic sync: node %s has no global-traffic endpoint (old build)", n.Name)
  288. } else {
  289. logger.Warningf("node traffic sync: push globals to %s failed: %v", n.Name, err)
  290. }
  291. }
  292. })
  293. }
  294. wg.Wait()
  295. }
  296. func (j *NodeTrafficSyncJob) syncOne(mgr *runtime.Manager, n *model.Node, doIpSync bool) {
  297. rt, err := mgr.RemoteFor(n)
  298. if err != nil {
  299. logger.Warningf("node traffic sync: remote lookup failed for %s: %v", n.Name, err)
  300. return
  301. }
  302. if n.ConfigDirty {
  303. reconcileCtx, reconcileCancel := context.WithTimeout(context.Background(), nodeReconcileTimeout)
  304. reconcileErr := j.inboundService.ReconcileNode(reconcileCtx, rt, n)
  305. reconcileCancel()
  306. if reconcileErr != nil {
  307. logger.Warningf("node traffic sync: reconcile for %s failed: %v", n.Name, reconcileErr)
  308. return
  309. }
  310. if clearErr := j.nodeService.ClearNodeDirty(n.Id, n.ConfigDirtyAt); clearErr != nil {
  311. logger.Warningf("node traffic sync: clear dirty for %s failed: %v", n.Name, clearErr)
  312. }
  313. j.structural.set()
  314. }
  315. ctx, cancel := context.WithTimeout(context.Background(), nodeTrafficSyncRequestTimeout)
  316. defer cancel()
  317. snap, err := rt.FetchTrafficSnapshot(ctx)
  318. if err != nil {
  319. logger.Warningf("node traffic sync: fetch from %s failed: %v", n.Name, err)
  320. j.inboundService.ClearNodeOnlineClients(n.Id)
  321. return
  322. }
  323. service.FilterNodeSnapshot(n, snap)
  324. _, _, dirty, _, _ := j.nodeService.NodeSyncState(n.Id)
  325. changed, err := j.inboundService.SetRemoteTraffic(n.Id, snap, dirty)
  326. if err != nil {
  327. logger.Warningf("node traffic sync: merge for %s failed: %v", n.Name, err)
  328. return
  329. }
  330. if changed {
  331. j.structural.set()
  332. }
  333. if !doIpSync {
  334. return
  335. }
  336. ipCtx, ipCancel := context.WithTimeout(context.Background(), nodeClientIpSyncTimeout)
  337. defer ipCancel()
  338. nodeIps, err := rt.FetchAllClientIps(ipCtx)
  339. if err == nil && len(nodeIps) > 0 {
  340. if err := j.inboundService.MergeInboundClientIps(nodeIps); err != nil {
  341. logger.Warningf("node traffic sync: merge client ips from %s failed: %v", n.Name, err)
  342. }
  343. } else if err != nil {
  344. logger.Warningf("node traffic sync: fetch client ips from %s failed: %v", n.Name, err)
  345. }
  346. masterIps, err := j.inboundService.GetAllInboundClientIps()
  347. if err != nil {
  348. logger.Warningf("node traffic sync: load client ips for push to %s failed: %v", n.Name, err)
  349. return
  350. }
  351. if len(masterIps) > 0 {
  352. if err := rt.PushAllClientIps(ipCtx, masterIps); err != nil {
  353. logger.Warningf("node traffic sync: push client ips to %s failed: %v", n.Name, err)
  354. }
  355. }
  356. // Per-node IP attribution: pull the node's guid-keyed subtree (its own
  357. // observations plus any descendants) so the master can tell which node each
  358. // IP is on. Old nodes without the endpoint return HTTP 404 every cycle — note
  359. // it once per node (re-armed on recovery) instead of flooding the log.
  360. if guidTrees, err := rt.FetchClientIpsByGuid(ipCtx); err != nil {
  361. if strings.Contains(err.Error(), "HTTP 404") {
  362. if _, seen := j.noGuidIpEndpoint.LoadOrStore(n.Id, true); !seen {
  363. logger.Debugf("node traffic sync: node %s has no client-IP attribution endpoint (old build)", n.Name)
  364. }
  365. } else {
  366. logger.Debugf("node traffic sync: fetch client ip attribution from %s failed: %v", n.Name, err)
  367. }
  368. } else {
  369. j.noGuidIpEndpoint.Delete(n.Id)
  370. if len(guidTrees) > 0 {
  371. if err := j.inboundService.MergeClientIpsByGuid(n, guidTrees); err != nil {
  372. logger.Warningf("node traffic sync: merge client ip attribution from %s failed: %v", n.Name, err)
  373. }
  374. }
  375. }
  376. }