Browse Source

fix(iplimit): populate client IP log without an IP limit

The per-client IP log was only filled as a side effect of IP-limit enforcement: Run() scraped the access log only when some client had limitIp>0, so installs without a limit always showed an empty IP log (#4800).

Decouple collection from enforcement: scrape the access log whenever it is available and thread an enforce flag through processLogFile/updateInboundClientIps so banning still only happens for limited clients. The XUI_ENABLE_FAIL2BAN kill-switch is preserved.

Closes #4800
MHSanaei 16 hours ago
parent
commit
66d4d04776
2 changed files with 66 additions and 65 deletions
  1. 15 63
      web/job/check_client_ip_job.go
  2. 51 2
      web/job/check_client_ip_job_integration_test.go

+ 15 - 63
web/job/check_client_ip_job.go

@@ -35,23 +35,6 @@ var job *CheckClientIpJob
 
 const defaultXrayAPIPort = 62789
 
-// ipStaleAfterSeconds controls how long a client IP kept in the
-// per-client tracking table (model.InboundClientIps.Ips) is considered
-// still "active" before it's evicted during the next scan.
-//
-// Without this eviction, an IP that connected once and then went away
-// keeps sitting in the table with its old timestamp. Because the
-// excess-IP selector sorts ascending ("newest wins, oldest loses") to
-// protect the most recent connections, that stale entry keeps
-// occupying a slot and the IP that is *actually* currently using the
-// config gets classified as "new excess" and banned by fail2ban on
-// every single run — producing the continuous ban loop from #4077.
-//
-// 30 minutes is chosen so an actively-streaming client (where xray
-// emits a fresh `accepted` log line whenever it opens a new TCP) will
-// always refresh its timestamp well within the window, but a client
-// that has really stopped using the config will drop out of the table
-// in a bounded time and free its slot.
 const ipStaleAfterSeconds = int64(30 * 60)
 
 // NewCheckClientIpJob creates a new client IP monitoring job instance.
@@ -67,27 +50,20 @@ func (j *CheckClientIpJob) Run() {
 
 	shouldClearAccessLog := false
 	fail2BanEnabled := isFail2BanEnabled()
-	iplimitActive := fail2BanEnabled && j.hasLimitIp()
+	hasLimit := fail2BanEnabled && j.hasLimitIp()
 	f2bInstalled := false
-	if iplimitActive {
+	if hasLimit {
 		f2bInstalled = j.checkFail2BanInstalled()
 	}
-	isAccessLogAvailable := j.checkAccessLogAvailable(iplimitActive)
+	isAccessLogAvailable := j.checkAccessLogAvailable(hasLimit)
 
-	if isAccessLogAvailable {
-		if runtime.GOOS == "windows" {
-			if iplimitActive {
-				shouldClearAccessLog = j.processLogFile()
-			}
-		} else {
-			if iplimitActive {
-				if f2bInstalled {
-					shouldClearAccessLog = j.processLogFile()
-				} else {
-					logger.Warning("[LimitIP] Fail2Ban is not installed, Please install Fail2Ban from the x-ui bash menu.")
-				}
-			}
+	if fail2BanEnabled && isAccessLogAvailable {
+		enforce := hasLimit
+		if hasLimit && runtime.GOOS != "windows" && !f2bInstalled {
+			logger.Warning("[LimitIP] Fail2Ban is not installed, Please install Fail2Ban from the x-ui bash menu.")
+			enforce = false
 		}
+		shouldClearAccessLog = j.processLogFile(enforce)
 	}
 
 	if shouldClearAccessLog || (isAccessLogAvailable && time.Now().Unix()-j.lastClear > 3600) {
@@ -145,7 +121,7 @@ func (j *CheckClientIpJob) hasLimitIp() bool {
 	return false
 }
 
-func (j *CheckClientIpJob) processLogFile() bool {
+func (j *CheckClientIpJob) processLogFile(enforce bool) bool {
 
 	ipRegex := regexp.MustCompile(`from (?:tcp:|udp:)?\[?([0-9a-fA-F\.:]+)\]?:\d+ accepted`)
 	emailRegex := regexp.MustCompile(`email: (.+)$`)
@@ -220,18 +196,12 @@ func (j *CheckClientIpJob) processLogFile() bool {
 			continue
 		}
 
-		shouldCleanLog = j.updateInboundClientIps(clientIpsRecord, email, ipsWithTime) || shouldCleanLog
+		shouldCleanLog = j.updateInboundClientIps(clientIpsRecord, email, ipsWithTime, enforce) || shouldCleanLog
 	}
 
 	return shouldCleanLog
 }
 
-// mergeClientIps combines the persisted (old) and freshly observed (new)
-// IP-with-timestamp lists for a single client into a map. An entry is
-// dropped if its last-seen timestamp is older than staleCutoff.
-//
-// Extracted as a helper so updateInboundClientIps can stay DB-oriented
-// and the merge policy can be exercised by a unit test.
 func mergeClientIps(old, new []IPWithTimestamp, staleCutoff int64) map[string]int64 {
 	ipMap := make(map[string]int64, len(old)+len(new))
 	for _, ipTime := range old {
@@ -251,19 +221,6 @@ func mergeClientIps(old, new []IPWithTimestamp, staleCutoff int64) map[string]in
 	return ipMap
 }
 
-// partitionLiveIps splits the merged ip map into live (seen in the
-// current scan) and historical (only in the db blob, still inside the
-// staleness window).
-//
-// only live ips count toward the per-client limit. historical ones stay
-// in the db so the panel keeps showing them, but they must not take a
-// live slot or get re-banned. the 30min cutoff alone isn't tight enough:
-// an ip that stopped connecting a few minutes ago still looks fresh to
-// mergeClientIps, and without this split it would keep triggering
-// fail2ban even though it isn't currently connected. see #4077 / #4091.
-//
-// live is sorted ascending by timestamp (oldest → newest), so we keep
-// the most recent entries at the end of the slice (last IP wins).
 func partitionLiveIps(ipMap map[string]int64, observedThisScan map[string]bool) (live, historical []IPWithTimestamp) {
 	live = make([]IPWithTimestamp, 0, len(observedThisScan))
 	historical = make([]IPWithTimestamp, 0, len(ipMap))
@@ -354,7 +311,7 @@ func (j *CheckClientIpJob) addInboundClientIps(clientEmail string, ipsWithTime [
 	return nil
 }
 
-func (j *CheckClientIpJob) updateInboundClientIps(inboundClientIps *model.InboundClientIps, clientEmail string, newIpsWithTime []IPWithTimestamp) bool {
+func (j *CheckClientIpJob) updateInboundClientIps(inboundClientIps *model.InboundClientIps, clientEmail string, newIpsWithTime []IPWithTimestamp, enforce bool) bool {
 	// Get the inbound configuration
 	inbound, err := j.getInboundByEmail(clientEmail)
 	if err != nil {
@@ -382,8 +339,9 @@ func (j *CheckClientIpJob) updateInboundClientIps(inboundClientIps *model.Inboun
 		}
 	}
 
-	if !clientFound || limitIp <= 0 || !inbound.Enable {
-		// No limit or inbound disabled, just update and return
+	if !enforce || !clientFound || limitIp <= 0 || !inbound.Enable {
+		// Nothing to enforce (collection-only run, no limit, client missing, or
+		// inbound disabled): record the observed IPs for the panel and return.
 		jsonIps, _ := json.Marshal(newIpsWithTime)
 		inboundClientIps.Ips = string(jsonIps)
 		db := database.GetDB()
@@ -397,8 +355,6 @@ func (j *CheckClientIpJob) updateInboundClientIps(inboundClientIps *model.Inboun
 		json.Unmarshal([]byte(inboundClientIps.Ips), &oldIpsWithTime)
 	}
 
-	// Merge old and new IPs, evicting entries that haven't been
-	// re-observed in a while. See mergeClientIps / #4077 for why.
 	ipMap := mergeClientIps(oldIpsWithTime, newIpsWithTime, time.Now().Unix()-ipStaleAfterSeconds)
 
 	// only ips seen in this scan count toward the limit. see
@@ -422,10 +378,6 @@ func (j *CheckClientIpJob) updateInboundClientIps(inboundClientIps *model.Inboun
 		keptLive = liveIps[cutoff:]
 		bannedLive := liveIps[:cutoff]
 
-		// Open log file only when a ban entry needs to be written.
-		// Use a local logger to avoid mutating the global log.* state,
-		// which would redirect all standard-library logging to this file
-		// and leave a dangling closed-file handle after the defer fires.
 		logIpFile, err := os.OpenFile(xray.GetIPLimitLogPath(), os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
 		if err != nil {
 			logger.Errorf("failed to open IP limit log file: %s", err)

+ 51 - 2
web/job/check_client_ip_job_integration_test.go

@@ -195,7 +195,7 @@ func TestUpdateInboundClientIps_LiveIpNotBannedByStillFreshHistoricals(t *testin
 		{IP: "128.71.1.1", Timestamp: now},
 	}
 
-	shouldCleanLog := j.updateInboundClientIps(row, email, live)
+	shouldCleanLog := j.updateInboundClientIps(row, email, live, true)
 
 	if shouldCleanLog {
 		t.Fatalf("shouldCleanLog must be false, nothing should have been banned with 1 live ip under limit 3")
@@ -244,7 +244,7 @@ func TestUpdateInboundClientIps_ExcessLiveIpIsStillBanned(t *testing.T) {
 		{IP: "192.0.2.9", Timestamp: now},
 	}
 
-	shouldCleanLog := j.updateInboundClientIps(row, email, live)
+	shouldCleanLog := j.updateInboundClientIps(row, email, live, true)
 
 	if !shouldCleanLog {
 		t.Fatalf("shouldCleanLog must be true when the live set exceeds the limit")
@@ -272,6 +272,55 @@ func TestUpdateInboundClientIps_ExcessLiveIpIsStillBanned(t *testing.T) {
 	}
 }
 
+// writeXrayAccessLog points bin/config.json at a fresh access.log holding a
+// single default-format Xray line (`from tcp:<ip>:<port> accepted … email: <e>`)
+// for the given client, so Run() has something to scrape.
+func writeXrayAccessLog(t *testing.T, email, ip string) {
+	t.Helper()
+	binDir := t.TempDir()
+	accessLog := filepath.Join(t.TempDir(), "access.log")
+	t.Setenv("XUI_BIN_FOLDER", binDir)
+	configData, err := json.Marshal(map[string]any{
+		"log": map[string]any{"access": accessLog},
+	})
+	if err != nil {
+		t.Fatalf("marshal xray config: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(binDir, "config.json"), configData, 0644); err != nil {
+		t.Fatalf("write xray config: %v", err)
+	}
+	line := "2026/06/02 13:35:53 from tcp:" + ip + ":2387 accepted tcp:example.com:443 email: " + email + "\n"
+	if err := os.WriteFile(accessLog, []byte(line), 0644); err != nil {
+		t.Fatalf("write access log: %v", err)
+	}
+}
+
+// #4800: the per-client IP log must populate even when no client has an IP
+// limit. Before the fix, Run() only scraped the access log when an IP limit
+// was active, so a limit-free install always showed an empty IP log despite
+// valid access-log lines. No ban may be written since there's no limit.
+func TestRun_CollectsIpsWithoutLimit(t *testing.T) {
+	setupIntegrationDB(t)
+	t.Setenv("XUI_ENABLE_FAIL2BAN", "true")
+	fakeFail2BanClient(t)
+
+	const email = "no-limit-user"
+	seedInboundWithClient(t, "inbound-no-limit", email, 0) // limitIp = 0
+	writeXrayAccessLog(t, email, "203.0.113.10")
+
+	NewCheckClientIpJob().Run()
+
+	ips := readClientIps(t, email)
+	if len(ips) != 1 || ips[0].IP != "203.0.113.10" {
+		t.Fatalf("expected the access-log IP to be collected without a limit, got %v", ips)
+	}
+
+	if info, err := os.Stat(readIpLimitLogPath()); err == nil && info.Size() > 0 {
+		body, _ := os.ReadFile(readIpLimitLogPath())
+		t.Fatalf("3xipl.log should be empty with no limit set, got:\n%s", body)
+	}
+}
+
 // readIpLimitLogPath reads the 3xipl.log path the same way the job
 // does via xray.GetIPLimitLogPath but without importing xray here
 // just for the path helper (which would pull a lot more deps into the