diff --git a/README.md b/README.md index 475a5ef..a71fc23 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Use this only on systems you own or are explicitly authorized to administer. ## Contents +- [What's New in 1.8.5](#whats-new-in-185) - [What's New in 1.8.0](#whats-new-in-180) - [What's New in 1.7.0](#whats-new-in-170) - [What's New in 1.6.0](#whats-new-in-160) @@ -57,6 +58,28 @@ Use this only on systems you own or are explicitly authorized to administer. --- +## What's New in 1.8.5 + +A fleet **reliability and observability** release. **No database schema change since 1.8.0.** + +**Agent liveness over a named pipe** +- The Helper (updater) now reads agent liveness from the agent's read-only status pipe + (`RemoteAgent.status` → `LastHeartbeatUtc`) instead of a heartbeat file, removing a file-race that could + report a bogus multi-billion-second "stale heartbeat" and force an unnecessary agent restart. +- A two-poll confirmation keeps a single transient blip from restarting a healthy agent. The legacy + heartbeat file is still written for an older, file-based Helper during a rolling update and **self-retires** + once the co-located Helper is the new pipe-aware build. + +**Flaky-link detection (observability only)** +- The device list now tells **"alive but on a poor network"** apart from **"offline / dead"**: a device with + frequent C2 reconnects shows as **`◐ flaky`** (amber) instead of **`○ offline`** (grey), with the reconnect + count in the tooltip and a *Link* row in the telemetry panel. +- Computed **server-side** from C2 connection churn (in-memory, last hour). It is **pure observability and + never triggers a restart**, needs no schema change, and is backward/forward compatible (older clients + ignore the new field; an older server leaves it dormant). + +--- + ## What's New in 1.8.0 1.8.0 adds **agentless operator consoles for Linux and Windows** and hardens the keyless sign-in path. diff --git a/src/RemoteAgent.Contracts/Admin.cs b/src/RemoteAgent.Contracts/Admin.cs index 9a75490..0f79da0 100644 --- a/src/RemoteAgent.Contracts/Admin.cs +++ b/src/RemoteAgent.Contracts/Admin.cs @@ -17,6 +17,18 @@ public sealed class DeviceInfo [JsonPropertyName("online")] public bool Online { get; set; } + /// C2 (re)connections observed for this device in the last hour (server connection registry). + /// 0–1 = stable; higher = flaky link (agent likely alive, poor network), not a dead device. + [JsonPropertyName("recentReconnects")] + public int RecentReconnects { get; set; } + + /// Churn at or above this is shown as "flaky" rather than "stable". Shared display threshold. + public const int FlakyReconnectThreshold = 3; + + /// Derived display flag: the link churns enough to call it flaky. Not serialized. + [JsonIgnore] + public bool LinkFlaky => RecentReconnects >= FlakyReconnectThreshold; + [JsonPropertyName("lastSeenAt")] public DateTimeOffset? LastSeenAt { get; set; } diff --git a/src/RemoteAgent.Contracts/Status.cs b/src/RemoteAgent.Contracts/Status.cs index 1b68714..04b3cf2 100644 --- a/src/RemoteAgent.Contracts/Status.cs +++ b/src/RemoteAgent.Contracts/Status.cs @@ -39,6 +39,10 @@ public sealed class StatusReport /// Time of the last successful server contact, either C2 connection or telemetry. [JsonPropertyName("lastServerContactUtc")] public DateTimeOffset? LastServerContactUtc { get; set; } + /// Agent liveness tick, updated by the agent roughly every 15 s. The Helper reads it over this + /// status pipe to detect a hung agent (stale or missing tick), replacing the old heartbeat file. + [JsonPropertyName("lastHeartbeatUtc")] public DateTimeOffset? LastHeartbeatUtc { get; set; } + /// Local agent device ID sent by the client in login/reset requests for the device-level failure counter. [JsonPropertyName("deviceId")] public string? DeviceId { get; set; } } diff --git a/src/RemoteAgent.Updater/Localization/String.en.cs b/src/RemoteAgent.Updater/Localization/String.en.cs index d3b86fc..4ef6f4e 100644 --- a/src/RemoteAgent.Updater/Localization/String.en.cs +++ b/src/RemoteAgent.Updater/Localization/String.en.cs @@ -7,6 +7,7 @@ internal static partial class Strings private static readonly Dictionary En = new() { [nameof(SupervisorWorker_AgentHungHeartbeatAbout0)] = "agent hung (heartbeat about {0:F0}s old) - forced restart", + [nameof(SupervisorWorker_AgentHungNoHeartbeat)] = "agent hung (heartbeat file missing/unreadable) - forced restart", [nameof(SupervisorWorker_RemoteAgentIsNotRunningState)] = "RemoteAgent is not running ({State}) - starting.", [nameof(SupervisorWorker_AgentStoppedRestarted)] = "agent stopped -> restarted", [nameof(SupervisorWorker_AgentStartFailed)] = "agent start failed", diff --git a/src/RemoteAgent.Updater/Localization/String.hu.cs b/src/RemoteAgent.Updater/Localization/String.hu.cs index d70760f..b0ea9c1 100644 --- a/src/RemoteAgent.Updater/Localization/String.hu.cs +++ b/src/RemoteAgent.Updater/Localization/String.hu.cs @@ -7,6 +7,7 @@ internal static partial class Strings private static readonly Dictionary Hu = new() { [nameof(SupervisorWorker_AgentHungHeartbeatAbout0)] = "agent beragadt (életjel ~{0:F0}s régi) — kényszerített újraindítás", + [nameof(SupervisorWorker_AgentHungNoHeartbeat)] = "agent beragadt (nincs/olvashatatlan életjel-fájl) — kényszerített újraindítás", [nameof(SupervisorWorker_RemoteAgentIsNotRunningState)] = "A RemoteAgent nem fut ({State}) — indítás.", [nameof(SupervisorWorker_AgentStoppedRestarted)] = "agent leállt → újraindítva", [nameof(SupervisorWorker_AgentStartFailed)] = "agent indítása sikertelen", diff --git a/src/RemoteAgent.Updater/Localization/Strings.cs b/src/RemoteAgent.Updater/Localization/Strings.cs index 6aca566..beea446 100644 --- a/src/RemoteAgent.Updater/Localization/Strings.cs +++ b/src/RemoteAgent.Updater/Localization/Strings.cs @@ -61,6 +61,7 @@ private static string NormalizeLanguageCode(string? langCode) } public static string SupervisorWorker_AgentHungHeartbeatAbout0 => Get(nameof(SupervisorWorker_AgentHungHeartbeatAbout0)); + public static string SupervisorWorker_AgentHungNoHeartbeat => Get(nameof(SupervisorWorker_AgentHungNoHeartbeat)); public static string SupervisorWorker_RemoteAgentIsNotRunningState => Get(nameof(SupervisorWorker_RemoteAgentIsNotRunningState)); public static string SupervisorWorker_AgentStoppedRestarted => Get(nameof(SupervisorWorker_AgentStoppedRestarted)); public static string SupervisorWorker_AgentStartFailed => Get(nameof(SupervisorWorker_AgentStartFailed)); diff --git a/src/RemoteAgent.Updater/RemoteAgent.Updater.csproj b/src/RemoteAgent.Updater/RemoteAgent.Updater.csproj index 4e4453f..a986660 100644 --- a/src/RemoteAgent.Updater/RemoteAgent.Updater.csproj +++ b/src/RemoteAgent.Updater/RemoteAgent.Updater.csproj @@ -7,7 +7,7 @@ RemoteAgent.Updater RemoteAgent.Updater Exe - 1.8.0.0 + 1.8.5.0 ..\..\icon\app.ico diff --git a/src/RemoteAgent.Updater/SupervisorWorker.cs b/src/RemoteAgent.Updater/SupervisorWorker.cs index 65da998..2e6a71a 100644 --- a/src/RemoteAgent.Updater/SupervisorWorker.cs +++ b/src/RemoteAgent.Updater/SupervisorWorker.cs @@ -1,8 +1,10 @@ using System.Diagnostics; -using System.Globalization; +using System.IO.Pipes; using System.Text.Json; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; +using RemoteAgent.Admin; +using RemoteAgent.Commands; using L = RemoteAgent.Updater.Localization.Strings; namespace RemoteAgent.Updater; @@ -15,16 +17,16 @@ namespace RemoteAgent.Updater; /// service, replaces the executable, and restarts it. A running service cannot /// replace its own binary, so this lives in a separate executable/service. /// -/// 2) WATCHDOG: watches the agent heartbeat file -/// (<ProgramData>\RemoteAgent\agent.heartbeat). +/// 2) WATCHDOG: checks the agent's liveness over its read-only status named pipe +/// ("RemoteAgent.status", StatusReport.LastHeartbeatUtc). /// - if the service is not running, it tries to start it; -/// - if the service appears running but the heartbeat is stale, the agent is hung -/// (SCM only sees process exit): stop, kill by PID if it does not stop in time, -/// then restart. +/// - if the service appears running but the pipe is unresponsive or the heartbeat tick is +/// stale, the agent is hung (SCM only sees process exit): stop, kill by PID if it does not +/// stop in time, then restart. /// Backoff and circuit breaker prevent a tight failure loop; reboot is the natural reset. /// -/// The Helper has no network or command authority. It only reacts to local markers and -/// heartbeat files. Only the authenticated Agent talks to the server. Incidents are +/// The Helper has no network or command authority. It only reacts to local update markers and the +/// agent's status pipe. Only the authenticated Agent talks to the server. Incidents are /// written to a local status file and uploaded by the Agent as telemetry. /// public sealed class SupervisorWorker(ILogger logger) : BackgroundService @@ -34,11 +36,13 @@ public sealed class SupervisorWorker(ILogger logger) : Backgro private static readonly string DataDir = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), "RemoteAgent"); private static readonly string UpdateDir = Path.Combine(DataDir, "update"); - private static readonly string HeartbeatFile = Path.Combine(DataDir, "agent.heartbeat"); private static readonly string StatusFile = Path.Combine(DataDir, "supervisor.status"); + private const string StatusPipeName = "RemoteAgent.status"; private static readonly TimeSpan Poll = TimeSpan.FromSeconds(10); private static readonly TimeSpan HeartbeatStale = TimeSpan.FromSeconds(90); + private const int HungConfirmPolls = 2; // consecutive unhealthy polls required before a forced restart + private const int PipeConnectTimeoutMs = 5000; // the status pipe must answer within this, else the agent is treated as hung private static readonly TimeSpan StartGrace = TimeSpan.FromSeconds(60); // do not judge hang immediately after start private static readonly TimeSpan StopTimeout = TimeSpan.FromSeconds(20); // graceful stop window before killing private const int MaxConsecutiveFailures = 5; @@ -49,6 +53,7 @@ public sealed class SupervisorWorker(ILogger logger) : Backgro private DateTimeOffset _lastAgentAction = DateTimeOffset.UtcNow; private DateTimeOffset _parkedUntil = DateTimeOffset.MinValue; private int _consecutiveFailures; + private int _unhealthyPolls; // consecutive polls with a stale/missing heartbeat (transient-blip filter) private int _agentRestarts; private string? _lastIncident; @@ -91,21 +96,30 @@ private async Task WatchdogAsync(CancellationToken ct) if (DateTimeOffset.UtcNow - _lastAgentAction < StartGrace) return; - var age = HeartbeatAge(); - if (age <= HeartbeatStale) + var age = await HeartbeatAgeAsync(ct); + if (age is { } fresh && fresh <= HeartbeatStale) { - // Healthy means both running and heartbeat present; only this resets failure state. + // Healthy means both running and a recent heartbeat; only this resets failure state. + _unhealthyPolls = 0; _consecutiveFailures = 0; _parkedUntil = DateTimeOffset.MinValue; return; } + // A single missing/unreadable heartbeat is usually a transient file race with the agent's 15 s + // write, not a hang; only act once it stays unhealthy across two consecutive polls. + if (++_unhealthyPolls < HungConfirmPolls) + return; + // Running but silent means hung. When parked, do not hammer SCM. if (DateTimeOffset.UtcNow < _parkedUntil) return; - _lastIncident = L.Format(L.SupervisorWorker_AgentHungHeartbeatAbout0, age.TotalSeconds); + _lastIncident = age is { } stale + ? L.Format(L.SupervisorWorker_AgentHungHeartbeatAbout0, stale.TotalSeconds) + : L.SupervisorWorker_AgentHungNoHeartbeat; logger.LogWarning("{Incident}", _lastIncident); + _unhealthyPolls = 0; await RestartHungAgentAsync(ct); await RegisterFailureAsync(); // hung-service churn should also trip the breaker return; @@ -154,18 +168,23 @@ private async Task RegisterFailureAsync() await WriteStatusAsync(); } - private static TimeSpan HeartbeatAge() + /// Agent liveness age read over the status named pipe (now - StatusReport.LastHeartbeatUtc). + /// Null when the pipe does not answer in time (agent hung/dead). An older agent that serves the pipe but + /// has no heartbeat field counts as fresh (TimeSpan.Zero) — the pipe answering already proves it is alive. + private static async Task HeartbeatAgeAsync(CancellationToken ct) { try { - if (!File.Exists(HeartbeatFile)) return TimeSpan.MaxValue; - var txt = File.ReadAllText(HeartbeatFile).Trim(); - if (DateTimeOffset.TryParse(txt, CultureInfo.InvariantCulture, - DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var ts)) - return DateTimeOffset.UtcNow - ts; - return DateTimeOffset.UtcNow - File.GetLastWriteTimeUtc(HeartbeatFile); // fallback + await using var pipe = new NamedPipeClientStream(".", StatusPipeName, PipeDirection.In, PipeOptions.Asynchronous); + await pipe.ConnectAsync(PipeConnectTimeoutMs, ct); + using var ms = new MemoryStream(); + await pipe.CopyToAsync(ms, ct); + if (ms.Length == 0) return null; + var report = JsonSerializer.Deserialize(ms.ToArray(), AgentJsonContext.Default.StatusReport); + if (report is null) return null; + return report.LastHeartbeatUtc is { } beat ? DateTimeOffset.UtcNow - beat : TimeSpan.Zero; } - catch { return TimeSpan.MaxValue; } + catch { return null; } // pipe unavailable / connect timeout = agent not serving = hung } // ---------------- UPDATE SWAP ---------------- diff --git a/src/RemoteAgent/RemoteAgent.csproj b/src/RemoteAgent/RemoteAgent.csproj index 31d2f8c..a677a97 100644 --- a/src/RemoteAgent/RemoteAgent.csproj +++ b/src/RemoteAgent/RemoteAgent.csproj @@ -7,7 +7,7 @@ enable RemoteAgent RemoteAgent - 1.8.0.0 + 1.8.5.0 ..\..\icon\app.ico diff --git a/src/RemoteAgent/Services/AgentStatusState.cs b/src/RemoteAgent/Services/AgentStatusState.cs index 0f5e4f3..96ccaa9 100644 --- a/src/RemoteAgent/Services/AgentStatusState.cs +++ b/src/RemoteAgent/Services/AgentStatusState.cs @@ -9,6 +9,7 @@ public sealed class AgentStatusState { private volatile bool _c2Connected; private long _lastContactTicks; // DateTimeOffset.UtcNow.UtcTicks, 0 = never + private long _lastHeartbeatTicks; // agent liveness tick, 0 = never public bool C2Connected => _c2Connected; @@ -31,4 +32,17 @@ public void SetC2Connected(bool connected) /// Successful server communication occurred through C2 or telemetry. public void MarkServerContact() => Interlocked.Exchange(ref _lastContactTicks, DateTimeOffset.UtcNow.UtcTicks); + + /// Agent liveness tick, bumped periodically while the work loop is alive. The Helper reads it + /// over the status pipe (StatusReport.LastHeartbeatUtc) to detect a hung agent. + public DateTimeOffset? LastHeartbeatUtc + { + get + { + var t = Interlocked.Read(ref _lastHeartbeatTicks); + return t == 0 ? null : new DateTimeOffset(t, TimeSpan.Zero); + } + } + + public void Heartbeat() => Interlocked.Exchange(ref _lastHeartbeatTicks, DateTimeOffset.UtcNow.UtcTicks); } diff --git a/src/RemoteAgent/Services/HeartbeatService.cs b/src/RemoteAgent/Services/HeartbeatService.cs index 24a200a..35fc051 100644 --- a/src/RemoteAgent/Services/HeartbeatService.cs +++ b/src/RemoteAgent/Services/HeartbeatService.cs @@ -7,14 +7,17 @@ namespace RemoteAgent.Services; /// -/// Periodically updates the heartbeat file (<EnrollmentDir>\agent.heartbeat). The Helper -/// (RemoteAgent.Updater) watches it: if the heartbeat is stale while the service is "running", -/// the agent is hung. SCM cannot see that, only process exit. The Helper recovers through -/// stop, optional kill, and restart. Deliberately cheap signal: one file timestamp, no IPC. +/// Periodically bumps the agent liveness tick (), which the +/// Helper (RemoteAgent.Updater) reads over the status pipe as StatusReport.LastHeartbeatUtc: if the +/// tick is stale while the service is "running", the agent is hung. SCM cannot see that, only process +/// exit. The Helper recovers through stop, optional kill, and restart. The legacy heartbeat file +/// (<EnrollmentDir>\agent.heartbeat) is written only while the installed Helper is older than 1.8.1 +/// (file-based); once the co-located Helper is pipe-aware the agent stops writing it automatically. /// -public sealed class HeartbeatService(IOptions options, ILogger logger) : BackgroundService +public sealed class HeartbeatService(IOptions options, AgentStatusState status, RemoteAgent.Telemetry.SystemInfoCollector sysInfo, ILogger logger) : BackgroundService { private static readonly TimeSpan Interval = TimeSpan.FromSeconds(15); + private static readonly Version PipeAwareHelper = new(1, 8, 1, 0); // first Helper that reads liveness over the status pipe private readonly string _file = Path.Combine(options.Value.EnrollmentDir, "agent.heartbeat"); protected override async Task ExecuteAsync(CancellationToken stoppingToken) @@ -23,15 +26,27 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken) while (!stoppingToken.IsCancellationRequested) { - try + status.Heartbeat(); // primary liveness signal: the Helper reads it over the status pipe (StatusReport.LastHeartbeatUtc) + + // Legacy heartbeat file: only needed while an older, file-based Helper is installed. As soon as the + // co-located Helper is pipe-aware (>= 1.8.1) it reads the tick over the pipe, so this self-retires. + if (!HelperReadsPipe()) { - await File.WriteAllTextAsync(_file, DateTimeOffset.UtcNow.ToString("O"), stoppingToken); + try + { + await File.WriteAllTextAsync(_file, DateTimeOffset.UtcNow.ToString("O"), stoppingToken); + } + catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested) { break; } + catch (Exception ex) { logger.LogDebug(ex, L.HeartbeatService_HeartbeatWriteFailed); } } - catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested) { break; } - catch (Exception ex) { logger.LogDebug(ex, L.HeartbeatService_HeartbeatWriteFailed); } try { await Task.Delay(Interval, stoppingToken); } catch (OperationCanceledException) { break; } } } + + /// True when the installed Helper reads liveness over the status pipe (>= 1.8.1), making the legacy + /// heartbeat file redundant. Unknown or older version → false: keep writing the file (fail safe). + private bool HelperReadsPipe() => + Version.TryParse(sysInfo.ComponentVersions().Helper, out var v) && v >= PipeAwareHelper; } diff --git a/src/RemoteAgent/Services/StatusPipeService.cs b/src/RemoteAgent/Services/StatusPipeService.cs index 1547b33..6910d32 100644 --- a/src/RemoteAgent/Services/StatusPipeService.cs +++ b/src/RemoteAgent/Services/StatusPipeService.cs @@ -71,6 +71,7 @@ private async Task WriteStatusAsync(NamedPipeServerStream pipe, CancellationToke BastionTransport = transport.Transport, ActiveBastionPort = transport.LastWorkingPort, LastServerContactUtc = state.LastServerContactUtc, + LastHeartbeatUtc = state.LastHeartbeatUtc, Healthy = state.C2Connected, DeviceId = _deviceId, }; diff --git a/src/RemoteClient.Core/Localization/String.en.cs b/src/RemoteClient.Core/Localization/String.en.cs index 149e741..2f97ad6 100644 --- a/src/RemoteClient.Core/Localization/String.en.cs +++ b/src/RemoteClient.Core/Localization/String.en.cs @@ -230,6 +230,11 @@ public static partial class Strings [nameof(DevicesView_Connect)] = "Connect", [nameof(DevicesView_Device)] = "Device", [nameof(DevicesView_LastOnline)] = "Last online", + [nameof(DevicesView_LinkFlaky)] = "flaky", + [nameof(DevicesView_LinkFlakyTip)] = "Flaky link: {0} reconnects in the last hour — the agent is likely alive, just on a poor network.", + [nameof(DeviceTelemetryPanel_LinkQuality)] = "Link", + [nameof(DeviceTelemetryPanel_LinkStable)] = "stable", + [nameof(DeviceTelemetryPanel_LinkFlakyDetail)] = "flaky · {0} reconnects/hour", [nameof(DevicesView_Update)] = "Update", [nameof(DevicesView_UnlockSignIn)] = "Unlock sign-in", [nameof(DevicesView_Approve)] = "Approve", diff --git a/src/RemoteClient.Core/Localization/String.hu.cs b/src/RemoteClient.Core/Localization/String.hu.cs index 59dc44d..38a2863 100644 --- a/src/RemoteClient.Core/Localization/String.hu.cs +++ b/src/RemoteClient.Core/Localization/String.hu.cs @@ -230,6 +230,11 @@ public static partial class Strings [nameof(DevicesView_Connect)] = "Csatlakozás", [nameof(DevicesView_Device)] = "Gép", [nameof(DevicesView_LastOnline)] = "Utoljára online", + [nameof(DevicesView_LinkFlaky)] = "ingadozó", + [nameof(DevicesView_LinkFlakyTip)] = "Kapcsolat ingadozó: {0} újracsatlakozás az elmúlt órában — az agent valószínűleg él, csak a hálózat rossz.", + [nameof(DeviceTelemetryPanel_LinkQuality)] = "Kapcsolat", + [nameof(DeviceTelemetryPanel_LinkStable)] = "stabil", + [nameof(DeviceTelemetryPanel_LinkFlakyDetail)] = "ingadozó · {0} újracsatlakozás/óra", [nameof(DevicesView_Update)] = "Frissítés", [nameof(DevicesView_UnlockSignIn)] = "Belépés feloldása", [nameof(DevicesView_Approve)] = "Jóváhagyás", diff --git a/src/RemoteClient.Core/Localization/Strings.cs b/src/RemoteClient.Core/Localization/Strings.cs index 4ae48d2..4519984 100644 --- a/src/RemoteClient.Core/Localization/Strings.cs +++ b/src/RemoteClient.Core/Localization/Strings.cs @@ -284,6 +284,11 @@ private static string NormalizeLanguageCode(string? langCode) public static string DevicesView_Connect => Get(nameof(DevicesView_Connect)); public static string DevicesView_Device => Get(nameof(DevicesView_Device)); public static string DevicesView_LastOnline => Get(nameof(DevicesView_LastOnline)); + public static string DevicesView_LinkFlaky => Get(nameof(DevicesView_LinkFlaky)); + public static string DevicesView_LinkFlakyTip => Get(nameof(DevicesView_LinkFlakyTip)); + public static string DeviceTelemetryPanel_LinkQuality => Get(nameof(DeviceTelemetryPanel_LinkQuality)); + public static string DeviceTelemetryPanel_LinkStable => Get(nameof(DeviceTelemetryPanel_LinkStable)); + public static string DeviceTelemetryPanel_LinkFlakyDetail => Get(nameof(DeviceTelemetryPanel_LinkFlakyDetail)); public static string DevicesView_Update => Get(nameof(DevicesView_Update)); public static string DevicesView_UnlockSignIn => Get(nameof(DevicesView_UnlockSignIn)); public static string DevicesView_Approve => Get(nameof(DevicesView_Approve)); diff --git a/src/RemoteClient.Linux/RemoteClient.Linux.csproj b/src/RemoteClient.Linux/RemoteClient.Linux.csproj index f49beb1..2b80b18 100644 --- a/src/RemoteClient.Linux/RemoteClient.Linux.csproj +++ b/src/RemoteClient.Linux/RemoteClient.Linux.csproj @@ -12,7 +12,7 @@ false RemoteClient.Linux RemoteClient.Linux - 1.8.0.0 + 1.8.5.0 diff --git a/src/RemoteClient.Lite/RemoteClient.Lite.csproj b/src/RemoteClient.Lite/RemoteClient.Lite.csproj index 20fb244..2c10962 100644 --- a/src/RemoteClient.Lite/RemoteClient.Lite.csproj +++ b/src/RemoteClient.Lite/RemoteClient.Lite.csproj @@ -14,7 +14,7 @@ RemoteClient.Lite RemoteClient.Lite ..\..\icon\app.ico - 1.8.0.0 + 1.8.5.0 win-x64 true true diff --git a/src/RemoteClient/RemoteClient.csproj b/src/RemoteClient/RemoteClient.csproj index 443841d..f4172dd 100644 --- a/src/RemoteClient/RemoteClient.csproj +++ b/src/RemoteClient/RemoteClient.csproj @@ -17,7 +17,7 @@ ..\..\icon\app.ico - 1.8.0.0 + 1.8.5.0 diff --git a/src/RemoteClient/Views/DeviceTelemetryPanel.cs b/src/RemoteClient/Views/DeviceTelemetryPanel.cs index 5d58197..8acc764 100644 --- a/src/RemoteClient/Views/DeviceTelemetryPanel.cs +++ b/src/RemoteClient/Views/DeviceTelemetryPanel.cs @@ -42,6 +42,7 @@ void Row(string caption, string? value) Row(L.DevicesView_Device, d.Hostname); Row("Online", d.Online ? "online" : "offline"); + Row(L.DeviceTelemetryPanel_LinkQuality, d.LinkFlaky ? L.Format(L.DeviceTelemetryPanel_LinkFlakyDetail, d.RecentReconnects) : L.DeviceTelemetryPanel_LinkStable); Row(L.DevicesView_LastOnline, d.LastSeenAt?.LocalDateTime.ToString("g")); Row(L.BootstrapView_Status, d.Status); Row(L.DeviceTelemetryPanel_Channel, string.Equals(d.Channel, "beta", StringComparison.OrdinalIgnoreCase) ? "BETA" : "rtm"); diff --git a/src/RemoteClient/Views/DevicesView.cs b/src/RemoteClient/Views/DevicesView.cs index 565b33a..e3b3abe 100644 --- a/src/RemoteClient/Views/DevicesView.cs +++ b/src/RemoteClient/Views/DevicesView.cs @@ -204,8 +204,10 @@ private void RenderList() if (d.LoginLocked) item.ToolTipText = L.Format(L.DevicesView_SignInLockedFailedAttempts, d.LoginFailCount); item.SubItems.Add(d.GroupName ?? "—"); item.SubItems.Add(string.IsNullOrWhiteSpace(d.Note) ? "—" : d.Note); - var online = item.SubItems.Add(d.Online ? "● online" : "○ offline"); - online.ForeColor = d.Online ? Color.MediumSeaGreen : Color.Gray; + // Offline + recent C2 churn = "flaky" (agent likely alive, just a poor network) rather than dead. + var online = item.SubItems.Add(d.Online ? "● online" : d.LinkFlaky ? "◐ " + L.DevicesView_LinkFlaky : "○ offline"); + online.ForeColor = d.Online ? Color.MediumSeaGreen : d.LinkFlaky ? Color.DarkOrange : Color.Gray; + if (d.LinkFlaky && !d.LoginLocked) item.ToolTipText = L.Format(L.DevicesView_LinkFlakyTip, d.RecentReconnects); item.SubItems.Add(string.IsNullOrWhiteSpace(d.LoggedInUser) ? "—" : d.LoggedInUser); item.SubItems.Add(d.LastSeenAt?.LocalDateTime.ToString("g") ?? "—"); var pip = item.SubItems.Add(DeviceTelemetryPanel.PublicIp(d)); diff --git a/src/RemoteServer/Hub/AgentConnectionRegistry.cs b/src/RemoteServer/Hub/AgentConnectionRegistry.cs index f8fdd93..1230cee 100644 --- a/src/RemoteServer/Hub/AgentConnectionRegistry.cs +++ b/src/RemoteServer/Hub/AgentConnectionRegistry.cs @@ -14,11 +14,20 @@ public sealed class AgentConnectionRegistry { private readonly ConcurrentDictionary _connections = new(); + /// Per-device rolling C2 (re)connect history, in-memory only. Frequent reconnects = flaky link: + /// the agent is likely alive but on a poor network, as opposed to a genuinely offline/dead device. + private readonly ConcurrentDictionary _reconnects = new(); + private static readonly TimeSpan ReconnectWindowSpan = TimeSpan.FromHours(1); + public IReadOnlyCollection ConnectedDevices => _connections.Keys.ToArray(); public bool IsConnected(string deviceId) => _connections.ContainsKey(deviceId); - public void Register(string deviceId, WebSocket socket) => _connections[deviceId] = socket; + public void Register(string deviceId, WebSocket socket) + { + _connections[deviceId] = socket; + _reconnects.GetOrAdd(deviceId, static _ => new ReconnectWindow()).Mark(); // track C2 churn for the flaky-link signal + } public void Unregister(string deviceId, WebSocket socket) { @@ -37,4 +46,36 @@ public async Task TrySendAsync(string deviceId, AgentCommand cmd, Cancella await socket.SendAsync(payload, WebSocketMessageType.Text, endOfMessage: true, ct); return true; } + + /// How many times this device's C2 connection (re)established within the last hour. 0–1 is a stable + /// link; higher means the agent keeps dropping and reconnecting (flaky network), not a dead device. + public int RecentReconnects(string deviceId) => + _reconnects.TryGetValue(deviceId, out var w) ? w.CountWithin(ReconnectWindowSpan) : 0; + + /// Small thread-safe rolling window of recent connect timestamps for one device. + private sealed class ReconnectWindow + { + private const int Cap = 64; // bound memory for a pathologically flapping device + private readonly object _gate = new(); + private readonly Queue _hits = new(); + + public void Mark() + { + lock (_gate) + { + _hits.Enqueue(DateTimeOffset.UtcNow); + while (_hits.Count > Cap) _hits.Dequeue(); + } + } + + public int CountWithin(TimeSpan window) + { + var cutoff = DateTimeOffset.UtcNow - window; + lock (_gate) + { + while (_hits.Count > 0 && _hits.Peek() < cutoff) _hits.Dequeue(); + return _hits.Count; + } + } + } } diff --git a/src/RemoteServer/Program.cs b/src/RemoteServer/Program.cs index 0d7aad9..83b9cc1 100644 --- a/src/RemoteServer/Program.cs +++ b/src/RemoteServer/Program.cs @@ -620,6 +620,7 @@ await RegisterLoginFailAsync(db, email, ctx, device, opIp, uname, "password-rese Hostname = d.Hostname, Status = d.Status.ToString(), Online = registry.IsConnected(d.DeviceId), + RecentReconnects = registry.RecentReconnects(d.DeviceId), LastSeenAt = d.LastSeenAt, VncSecret = protector.TryUnprotect(d.VncSecret), GroupId = d.GroupId, diff --git a/src/RemoteServer/RemoteServer.csproj b/src/RemoteServer/RemoteServer.csproj index 83dfe63..168c862 100644 --- a/src/RemoteServer/RemoteServer.csproj +++ b/src/RemoteServer/RemoteServer.csproj @@ -26,7 +26,7 @@ net10.0 enable enable - 1.8.0.0 + 1.8.5.0