소스 검색

fix(nodes): extend duplicate-GUID hardening to master collisions, IP attribution, and a heartbeat warning

Builds on the node-vs-node fix: a node's GUID is now also treated as ambiguous when it equals the master's own panelGuid (a node cloned from the master), so the master's local clients and that node can't merge. Centralized as ambiguousNodeGuids(nodes, selfGuid) + effectiveNodeKey(node).

Applied the same node-unique fallback to the GUID-keyed IP attribution that #4983 added but the prior commit left collapsing: MergeClientIpsByGuid remaps a cloned node's own subtree to its node-unique key, nodeGuidNameMap resolves names by that key, and node deletion purges both keys. Added a throttled heartbeat warning so the operator is told to regenerate a duplicate panelGuid. Tests cover master-collision, effectiveNodeKey, and the IP remap.
MHSanaei 8 시간 전
부모
커밋
98c9ba1f91

+ 1 - 1
internal/web/job/node_traffic_sync_job.go

@@ -307,7 +307,7 @@ func (j *NodeTrafficSyncJob) syncOne(mgr *runtime.Manager, n *model.Node, doIpSy
 	if guidTrees, err := rt.FetchClientIpsByGuid(ipCtx); err != nil {
 		logger.Debugf("node traffic sync: fetch client ip attribution from %s failed: %v", n.Name, err)
 	} else if len(guidTrees) > 0 {
-		if err := j.inboundService.MergeClientIpsByGuid(guidTrees); err != nil {
+		if err := j.inboundService.MergeClientIpsByGuid(n, guidTrees); err != nil {
 			logger.Warningf("node traffic sync: merge client ip attribution from %s failed: %v", n.Name, err)
 		}
 	}

+ 7 - 15
internal/web/service/inbound_node.go

@@ -211,23 +211,15 @@ func (s *InboundService) setRemoteTrafficLocked(nodeID int, snap *runtime.Traffi
 	// originGuidFor attributes a synced inbound to the panel that physically
 	// hosts it: inbounds the node forwards from its own sub-nodes already carry
 	// a non-empty OriginNodeGuid (kept as-is across hops); the node's own local
-	// inbounds report empty, so they are attributed to the node's own key. That
-	// key is the node's panelGuid, unless another of this master's nodes reports
-	// the same GUID (cloned server — the panelGuid ships in the copied settings),
-	// in which case it would collapse both nodes into one #4983 bucket, so fall
-	// back to the node-unique id. Old-build nodes with no GUID use the id too.
+	// inbounds report empty, so they are attributed to the node's own key
+	// effectiveNodeKey, which is the node's panelGuid unless that GUID is
+	// ambiguous (shared with another node or the master, i.e. a cloned server),
+	// in which case it falls back to the node-unique id so #4983 attribution
+	// doesn't collapse two physical nodes into one bucket.
 	var nodeRow model.Node
 	db.Select("guid").Where("id = ?", nodeID).First(&nodeRow)
-	guidShared := false
-	if nodeRow.Guid != "" {
-		var sameGuid int64
-		db.Model(&model.Node{}).Where("guid = ?", nodeRow.Guid).Count(&sameGuid)
-		guidShared = sameGuid > 1
-	}
-	selfKey := nodeRow.Guid
-	if selfKey == "" || guidShared {
-		selfKey = synthNodeGuid(nodeID)
-	}
+	selfKey := effectiveNodeKey(&model.Node{Id: nodeID, Guid: nodeRow.Guid})
+	guidShared := nodeRow.Guid != "" && selfKey != nodeRow.Guid
 	originGuidFor := func(snapIb *model.Inbound) string {
 		if snapIb.OriginNodeGuid != "" {
 			return snapIb.OriginNodeGuid

+ 31 - 7
internal/web/service/inbound_node_ips.go

@@ -113,8 +113,26 @@ func (s *InboundService) RecordLocalClientIps(panelGuid string, observed map[str
 
 // MergeClientIpsByGuid folds a node's guid-keyed attribution report (its own
 // panelGuid subtree plus any descendants) into the local table, preserving which
-// physical node each IP is on across a chain.
-func (s *InboundService) MergeClientIpsByGuid(trees map[string]map[string][]model.ClientIpEntry) error {
+// physical node each IP is on across a chain. When node is non-nil and its own
+// panelGuid is ambiguous (shared with another node or the master — a cloned
+// server), the node's own subtree is remapped to its node-unique key so two
+// clones don't collapse into one attribution row; descendant subtrees keep their
+// distinct GUIDs. A nil node merges the report verbatim.
+func (s *InboundService) MergeClientIpsByGuid(node *model.Node, trees map[string]map[string][]model.ClientIpEntry) error {
+	if node != nil && node.Guid != "" {
+		if eff := effectiveNodeKey(node); eff != node.Guid {
+			if sub, ok := trees[node.Guid]; ok {
+				delete(trees, node.Guid)
+				if existing, ok := trees[eff]; ok {
+					for email, ips := range sub {
+						existing[email] = append(existing[email], ips...)
+					}
+				} else {
+					trees[eff] = sub
+				}
+			}
+		}
+	}
 	for guid, perEmail := range trees {
 		if err := upsertNodeClientIps(guid, perEmail); err != nil {
 			return err
@@ -242,18 +260,24 @@ func (s *InboundService) GetClientIpsWithNodes(email string) ([]ClientIpInfo, er
 	return out, nil
 }
 
-// nodeGuidNameMap maps each known node's stable guid to its display name.
+// nodeGuidNameMap maps each known node's attribution key to its display name,
+// keyed by effectiveNodeGuid so a cloned node's IPs (stored under its node-unique
+// key) still resolve to the right name instead of colliding under a shared GUID.
 func (s *InboundService) nodeGuidNameMap() map[string]string {
 	db := database.GetDB()
 	var nodes []model.Node
 	if err := db.Model(&model.Node{}).Find(&nodes).Error; err != nil {
 		return map[string]string{}
 	}
+	ptrs := make([]*model.Node, len(nodes))
+	for i := range nodes {
+		ptrs[i] = &nodes[i]
+	}
+	selfGuid, _ := (&SettingService{}).GetPanelGuid()
+	ambiguous := ambiguousNodeGuids(ptrs, selfGuid)
 	m := make(map[string]string, len(nodes))
-	for _, n := range nodes {
-		if n.Guid != "" {
-			m[n.Guid] = n.Name
-		}
+	for i := range nodes {
+		m[effectiveNodeGuid(&nodes[i], ambiguous)] = nodes[i].Name
 	}
 	return m
 }

+ 2 - 2
internal/web/service/inbound_node_ips_test.go

@@ -103,7 +103,7 @@ func TestGetClientIpNodeAttribution_NewestGuidWins(t *testing.T) {
 	}); err != nil {
 		t.Fatalf("record gA: %v", err)
 	}
-	if err := svc.MergeClientIpsByGuid(map[string]map[string][]model.ClientIpEntry{
+	if err := svc.MergeClientIpsByGuid(nil, map[string]map[string][]model.ClientIpEntry{
 		"gB": {"u@x": {{IP: "9.9.9.9", Timestamp: now}}},
 	}); err != nil {
 		t.Fatalf("merge gB: %v", err)
@@ -145,7 +145,7 @@ func TestGetClientIpsWithNodes_LabelsNodes(t *testing.T) {
 	}); err != nil {
 		t.Fatalf("record local: %v", err)
 	}
-	if err := svc.MergeClientIpsByGuid(map[string]map[string][]model.ClientIpEntry{
+	if err := svc.MergeClientIpsByGuid(nil, map[string]map[string][]model.ClientIpEntry{
 		"node-guid": {"u@x": {{IP: "2.2.2.2", Timestamp: now}}},
 	}); err != nil {
 		t.Fatalf("merge node: %v", err)

+ 78 - 19
internal/web/service/node.go

@@ -14,6 +14,7 @@ import (
 	"slices"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/mhsanaei/3x-ui/v3/internal/database"
@@ -192,14 +193,15 @@ func (s *NodeService) GetAll() ([]*model.Node, error) {
 		}
 	}
 	onlineByGuid := s.onlineEmailsByGuid()
-	shared := sharedNodeGuids(nodes)
+	selfGuid, _ := (&SettingService{}).GetPanelGuid()
+	ambiguous := ambiguousNodeGuids(nodes, selfGuid)
 	for _, n := range nodes {
 		n.InboundCount = len(inboundsByNode[n.Id])
 		n.DepletedCount = depletedByNode[n.Id]
 		// Online is attributed to the node that physically hosts the client
 		// (by GUID): a client on a sub-node counts under the sub-node, not
 		// the intermediate node it syncs through (#4983).
-		n.OnlineCount = len(onlineByGuid[effectiveNodeGuid(n, shared)])
+		n.OnlineCount = len(onlineByGuid[effectiveNodeGuid(n, ambiguous)])
 	}
 
 	return nodes, nil
@@ -221,39 +223,67 @@ func (s *NodeService) onlineEmailsByGuid() map[string]map[string]struct{} {
 
 // effectiveNodeGuid is a node's stable online/inbound attribution key: its
 // reported panelGuid, or a master-local synthetic node-id fallback when the node
-// has no GUID yet (old build) or shares its GUID with another direct node. The
-// shared case is a cloned server — the panelGuid is copied with the disk image —
-// where an identical GUID would otherwise collapse two physical nodes into one
-// #4983 attribution bucket. shared comes from sharedNodeGuids.
-func effectiveNodeGuid(n *model.Node, shared map[string]struct{}) string {
+// has no GUID yet (old build) or its GUID is ambiguous. ambiguous comes from
+// ambiguousNodeGuids.
+func effectiveNodeGuid(n *model.Node, ambiguous map[string]struct{}) string {
 	if n.Guid == "" {
 		return synthNodeGuid(n.Id)
 	}
 	if n.Id > 0 {
-		if _, dup := shared[n.Guid]; dup {
+		if _, bad := ambiguous[n.Guid]; bad {
 			return synthNodeGuid(n.Id)
 		}
 	}
 	return n.Guid
 }
 
-// sharedNodeGuids returns the panelGuids reported by more than one of this
-// master's own direct nodes (Id > 0). Transitive sub-nodes (Id 0) carry distinct
-// descendant GUIDs by construction and are excluded.
-func sharedNodeGuids(nodes []*model.Node) map[string]struct{} {
+// ambiguousNodeGuids returns the panelGuids a node must not be attributed under
+// directly, because doing so would merge two distinct identities: a GUID
+// reported by more than one of this master's direct nodes (cloned node servers
+// ship the same panelGuid in their copied settings), or a GUID equal to the
+// master's own panelGuid (a node cloned from the master). A node holding such a
+// GUID falls back to its node-unique synthNodeGuid. Transitive sub-nodes (Id 0)
+// carry distinct descendant GUIDs by construction and are excluded.
+func ambiguousNodeGuids(nodes []*model.Node, selfGuid string) map[string]struct{} {
 	counts := make(map[string]int, len(nodes))
 	for _, n := range nodes {
 		if n.Id > 0 && n.Guid != "" {
 			counts[n.Guid]++
 		}
 	}
-	shared := make(map[string]struct{})
+	ambiguous := make(map[string]struct{})
 	for guid, c := range counts {
 		if c > 1 {
-			shared[guid] = struct{}{}
+			ambiguous[guid] = struct{}{}
 		}
 	}
-	return shared
+	if selfGuid != "" {
+		if _, ok := counts[selfGuid]; ok {
+			ambiguous[selfGuid] = struct{}{}
+		}
+	}
+	return ambiguous
+}
+
+// effectiveNodeKey returns one node's attribution key without a preloaded node
+// list — its panelGuid when that GUID uniquely identifies it among the master's
+// nodes and differs from the master's own, otherwise its node-unique
+// synthNodeGuid. Same rule as effectiveNodeGuid + ambiguousNodeGuids, for the
+// write paths that handle a single node (online tree, IP attribution).
+func effectiveNodeKey(node *model.Node) string {
+	if node == nil {
+		return ""
+	}
+	if node.Guid == "" {
+		return synthNodeGuid(node.Id)
+	}
+	var sameGuid int64
+	database.GetDB().Model(&model.Node{}).Where("guid = ?", node.Guid).Count(&sameGuid)
+	masterGuid, _ := (&SettingService{}).GetPanelGuid()
+	if sameGuid > 1 || node.Guid == masterGuid {
+		return synthNodeGuid(node.Id)
+	}
+	return node.Guid
 }
 
 func (s *NodeService) GetById(id int) (*model.Node, error) {
@@ -484,7 +514,9 @@ func (s *NodeService) Delete(id int) error {
 		return common.NewError(fmt.Sprintf("cannot delete node: %d inbound(s) still attached to it; detach or delete them first", attached))
 	}
 	// Capture the node's guid before deleting the row so we can drop its per-node
-	// IP attribution (NodeClientIp is keyed by guid, not node id).
+	// IP attribution. NodeClientIp is keyed by the node's attribution key, which
+	// is its guid normally but its node-unique key for a cloned/ambiguous-guid
+	// node (see effectiveNodeKey) — so we purge both below.
 	var guid string
 	var n model.Node
 	if err := db.Select("guid").Where("id = ?", id).First(&n).Error; err == nil {
@@ -498,10 +530,12 @@ func (s *NodeService) Delete(id int) error {
 		if err := tx.Where("node_id = ?", id).Delete(&model.NodeClientTraffic{}).Error; err != nil {
 			return err
 		}
+		guids := []string{synthNodeGuid(id)}
 		if guid != "" {
-			if err := tx.Where("node_guid = ?", guid).Delete(&model.NodeClientIp{}).Error; err != nil {
-				return err
-			}
+			guids = append(guids, guid)
+		}
+		if err := tx.Where("node_guid IN ?", guids).Delete(&model.NodeClientIp{}).Error; err != nil {
+			return err
 		}
 		return tx.Where("id = ?", id).Delete(&model.Node{}).Error
 	}); err != nil {
@@ -621,6 +655,7 @@ func (s *NodeService) UpdateHeartbeat(id int, p HeartbeatPatch) error {
 	// failed probe) reports none, so the stable identity survives blips.
 	if p.Guid != "" {
 		updates["guid"] = p.Guid
+		s.warnOnDuplicateGuid(id, p.Guid)
 	}
 	if err := db.Model(model.Node{}).Where("id = ?", id).Updates(updates).Error; err != nil {
 		return err
@@ -635,6 +670,30 @@ func (s *NodeService) UpdateHeartbeat(id int, p HeartbeatPatch) error {
 	return nil
 }
 
+// warnedDupGuid remembers the (nodeID -> guid) pairs already warned about so a
+// cloned-server collision is logged once, not every heartbeat.
+var warnedDupGuid sync.Map
+
+// warnOnDuplicateGuid logs once when a node reports a panelGuid already held by
+// another node or by the master itself (the cloned-server footgun). Attribution
+// still works — it falls back to node-unique keys — but the operator should
+// regenerate the duplicate panelGuid to restore real identity and per-node IP
+// attribution. Re-arms if the collision later clears.
+func (s *NodeService) warnOnDuplicateGuid(id int, guid string) {
+	var clash int64
+	database.GetDB().Model(&model.Node{}).Where("guid = ? AND id <> ?", guid, id).Count(&clash)
+	masterGuid, _ := (&SettingService{}).GetPanelGuid()
+	if clash == 0 && guid != masterGuid {
+		warnedDupGuid.Delete(id)
+		return
+	}
+	if prev, ok := warnedDupGuid.Load(id); ok && prev == guid {
+		return
+	}
+	warnedDupGuid.Store(id, guid)
+	logger.Warningf("node %d reports panelGuid %s already used by another node or the master (cloned server?) — regenerate it on that node so online and IP attribution stay per-node", id, guid)
+}
+
 func (s *NodeService) MarkNodeDirty(id int) error {
 	if id <= 0 {
 		return nil

+ 90 - 14
internal/web/service/node_shared_guid_test.go

@@ -1,34 +1,41 @@
 package service
 
 import (
+	"fmt"
 	"testing"
+	"time"
 
 	"github.com/mhsanaei/3x-ui/v3/internal/database"
 	"github.com/mhsanaei/3x-ui/v3/internal/database/model"
 )
 
-// Cloned node servers ship an identical panelGuid in their copied settings.
-// effectiveNodeGuid must keep each physical node in its own attribution bucket
-// by falling back to the node-unique id when a GUID is shared, while leaving a
-// uniquely-identified node on its real GUID.
-func TestEffectiveNodeGuid_DisambiguatesSharedGuids(t *testing.T) {
+// Cloned node servers ship an identical panelGuid in their copied settings, and
+// a node cloned from the master shares the master's own GUID. effectiveNodeGuid
+// must keep each physical node in its own attribution bucket by falling back to
+// the node-unique id for both collision kinds, while leaving a uniquely-named
+// node on its real GUID and never folding transitive (Id 0) nodes.
+func TestEffectiveNodeGuid_DisambiguatesAmbiguousGuids(t *testing.T) {
 	nodes := []*model.Node{
 		{Id: 1, Guid: "dup"},
 		{Id: 2, Guid: "dup"},
 		{Id: 3, Guid: "uniq"},
 		{Id: 4, Guid: ""},
+		{Id: 5, Guid: "master"},
 		{Id: 0, Guid: "transitive"},
 	}
-	shared := sharedNodeGuids(nodes)
+	ambiguous := ambiguousNodeGuids(nodes, "master")
 
-	if _, ok := shared["dup"]; !ok {
-		t.Fatalf("dup must be flagged shared, got %v", shared)
+	if _, ok := ambiguous["dup"]; !ok {
+		t.Fatalf("dup must be flagged ambiguous, got %v", ambiguous)
 	}
-	if _, ok := shared["uniq"]; ok {
-		t.Fatalf("uniq must not be shared, got %v", shared)
+	if _, ok := ambiguous["master"]; !ok {
+		t.Fatalf("a node sharing the master GUID must be flagged, got %v", ambiguous)
 	}
-	if _, ok := shared["transitive"]; ok {
-		t.Fatalf("transitive (Id 0) must not count toward sharing, got %v", shared)
+	if _, ok := ambiguous["uniq"]; ok {
+		t.Fatalf("uniq must not be flagged, got %v", ambiguous)
+	}
+	if _, ok := ambiguous["transitive"]; ok {
+		t.Fatalf("transitive (Id 0) must not count, got %v", ambiguous)
 	}
 
 	cases := map[*model.Node]string{
@@ -36,15 +43,50 @@ func TestEffectiveNodeGuid_DisambiguatesSharedGuids(t *testing.T) {
 		nodes[1]: "node:2",
 		nodes[2]: "uniq",
 		nodes[3]: "node:4",
-		nodes[4]: "transitive",
+		nodes[4]: "node:5",
+		nodes[5]: "transitive",
 	}
 	for n, want := range cases {
-		if got := effectiveNodeGuid(n, shared); got != want {
+		if got := effectiveNodeGuid(n, ambiguous); got != want {
 			t.Errorf("effectiveNodeGuid(Id=%d, Guid=%q) = %q, want %q", n.Id, n.Guid, got, want)
 		}
 	}
 }
 
+// effectiveNodeKey (the no-preloaded-list variant used by the write paths) must
+// agree with the slice helper: fall back to the node-unique id when a GUID is
+// shared with another node or with the master, else keep the real GUID.
+func TestEffectiveNodeKey_FallsBackOnCollision(t *testing.T) {
+	setupConflictDB(t)
+	db := database.GetDB()
+	selfGuid, _ := (&SettingService{}).GetPanelGuid()
+	if selfGuid == "" {
+		t.Fatal("expected a panel guid")
+	}
+
+	mk := func(id int, name, guid string) *model.Node {
+		n := &model.Node{Id: id, Name: name, Address: fmt.Sprintf("10.0.0.%d", id), Port: 2053, ApiToken: "t", Guid: guid, Status: "online"}
+		if err := db.Create(n).Error; err != nil {
+			t.Fatalf("create %s: %v", name, err)
+		}
+		return n
+	}
+	dupA := mk(1, "a", "shared")
+	mk(2, "b", "shared")
+	uniq := mk(3, "c", "solo")
+	masterClone := mk(4, "d", selfGuid)
+
+	if got := effectiveNodeKey(dupA); got != "node:1" {
+		t.Errorf("node-node collision: got %q, want node:1", got)
+	}
+	if got := effectiveNodeKey(uniq); got != "solo" {
+		t.Errorf("unique node: got %q, want solo", got)
+	}
+	if got := effectiveNodeKey(masterClone); got != "node:4" {
+		t.Errorf("master collision: got %q, want node:4", got)
+	}
+}
+
 // recountByGuid must split per-node counts even when two direct nodes share a
 // GUID and their inbounds still carry that shared GUID as origin (pre-backfill).
 func TestRecountByGuid_SplitsClonedNodesWithSharedGuid(t *testing.T) {
@@ -84,3 +126,37 @@ func TestRecountByGuid_SplitsClonedNodesWithSharedGuid(t *testing.T) {
 		t.Errorf("unique node InboundCount = %d, want 1", n3.InboundCount)
 	}
 }
+
+// A cloned node's IP-attribution subtree must be stored under its node-unique
+// key, so a second clone sharing the GUID can't overwrite it in node_client_ips.
+func TestMergeClientIpsByGuid_RemapsClonedNodeSubtree(t *testing.T) {
+	setupClientIpTestDB(t)
+	db := database.GetDB()
+	svc := &InboundService{}
+	now := time.Now().Unix()
+
+	n1 := &model.Node{Id: 1, Name: "A", Address: "10.0.0.1", Port: 2053, ApiToken: "t", Guid: "dup", Status: "online"}
+	n2 := &model.Node{Id: 2, Name: "B", Address: "10.0.0.2", Port: 2053, ApiToken: "t", Guid: "dup", Status: "online"}
+	for _, n := range []*model.Node{n1, n2} {
+		if err := db.Create(n).Error; err != nil {
+			t.Fatalf("create node: %v", err)
+		}
+	}
+
+	if err := svc.MergeClientIpsByGuid(n1, map[string]map[string][]model.ClientIpEntry{
+		"dup": {"u@x": {{IP: "1.1.1.1", Timestamp: now}}},
+	}); err != nil {
+		t.Fatalf("merge n1: %v", err)
+	}
+
+	var rows []model.NodeClientIp
+	if err := db.Find(&rows).Error; err != nil {
+		t.Fatalf("load rows: %v", err)
+	}
+	if len(rows) != 1 {
+		t.Fatalf("want 1 attribution row, got %d", len(rows))
+	}
+	if rows[0].NodeGuid != "node:1" {
+		t.Errorf("cloned node IPs must be stored under node-unique key, got %q", rows[0].NodeGuid)
+	}
+}

+ 6 - 6
internal/web/service/node_tree.go

@@ -174,7 +174,7 @@ func (s *NodeService) recountByGuid(nodes []*model.Node, selfGuid string) {
 	if err := db.Table("inbounds").Select("id, node_id, origin_node_guid").Scan(&ibRows).Error; err != nil {
 		return
 	}
-	shared := sharedNodeGuids(nodes)
+	ambiguous := ambiguousNodeGuids(nodes, selfGuid)
 	effByInbound := make(map[int]string, len(ibRows))
 	inboundCountByGuid := make(map[string]int)
 	ids := make([]int, 0, len(ibRows))
@@ -187,10 +187,10 @@ func (s *NodeService) recountByGuid(nodes []*model.Node, selfGuid string) {
 				guid = selfGuid
 			}
 		} else if r.NodeID != nil {
-			// Origin still holds a GUID two direct nodes share (cloned server,
-			// not yet re-attributed): bucket under the hosting node's unique id
-			// so the clones don't merge.
-			if _, dup := shared[guid]; dup {
+			// Origin still holds an ambiguous GUID (cloned server / master-shared,
+			// not yet re-attributed): bucket under the hosting node's unique id so
+			// the clones don't merge.
+			if _, bad := ambiguous[guid]; bad {
 				guid = synthNodeGuid(*r.NodeID)
 			}
 		}
@@ -230,7 +230,7 @@ func (s *NodeService) recountByGuid(nodes []*model.Node, selfGuid string) {
 
 	onlineByGuid := s.onlineEmailsByGuid()
 	for _, n := range nodes {
-		guid := effectiveNodeGuid(n, shared)
+		guid := effectiveNodeGuid(n, ambiguous)
 		n.InboundCount = inboundCountByGuid[guid]
 		n.OnlineCount = len(onlineByGuid[guid])
 		n.DepletedCount = depletedByGuid[guid]