diff --git a/CHANGELOG.md b/CHANGELOG.md index 7190db1..f0427d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,25 @@ -## 1.2.0 - unreleased +## 1.2.1 + +### Fixed +* Discovery against appliances with a large number of application domains + (200+) where the orchestrator's API user lacks read access to many of them + no longer produces a SQL truncation failure in Keyfactor Command. Two + related fixes: + * `FlowLogger.GetSummary()` output is now capped at 3500 characters with + a `[truncated, N chars omitted; check orchestrator log for full + breadcrumb]` marker. Defense-in-depth against Command's + `AgentJobStatus.Message` column overflow (NVARCHAR(4000)). + * Discovery aggregates per-domain failures by error signature instead of + emitting one `FAIL` and one `SKIP` line per failed domain. Identical + HTTP errors across N domains collapse into one summary line with a + count and a 5-domain sample. Successful domains still emit one + `Discovered-` step each. + + Net effect on a 235-domain appliance with 114 inaccessible domains: the + breadcrumb summary drops from ~50 KB and 235+ step lines to ~1 KB and + ~10 step lines, well under Command's column cap. + +## 1.2.0 ### Added * Discovery job: automatically enumerates all application domains on a DataPower diff --git a/DataPower/FlowLogger.cs b/DataPower/FlowLogger.cs index 917f45c..8d67677 100644 --- a/DataPower/FlowLogger.cs +++ b/DataPower/FlowLogger.cs @@ -172,6 +172,12 @@ public void EndBranch() public bool HasFailures => _steps.Any(s => s.Status == StepStatus.Failed); + // Command's AgentJobStatus.Message column has a hard length cap (typically + // NVARCHAR(4000)). Truncate aggressively so a job result update never fails + // with "String or binary data would be truncated". Leaves headroom for the + // FailureMessage prefix the caller adds. + private const int MaxSummaryChars = 3500; + public string GetSummary() { var hasFailures = HasFailures; @@ -202,7 +208,14 @@ public string GetSummary() } sb.Append("----------------------------------------"); - return sb.ToString(); + var summary = sb.ToString(); + if (summary.Length > MaxSummaryChars) + { + var omitted = summary.Length - MaxSummaryChars; + summary = summary.Substring(0, MaxSummaryChars) + + $"\n... [truncated, {omitted} chars omitted; check orchestrator log for full breadcrumb]"; + } + return summary; } public void Dispose() diff --git a/DataPower/Jobs/Discovery.cs b/DataPower/Jobs/Discovery.cs index f7750c9..71dd91b 100644 --- a/DataPower/Jobs/Discovery.cs +++ b/DataPower/Jobs/Discovery.cs @@ -15,6 +15,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text.RegularExpressions; using Keyfactor.Extensions.Orchestrator.DataPower.Client; using Keyfactor.Logging; using Keyfactor.Orchestrators.Common.Enums; @@ -53,6 +54,29 @@ public class Discovery : JobBase, IDiscoveryJobExtension // exact casing has shifted across Command versions. private static readonly string[] DirsToSearchKeys = { "dirs", "Dirs", "directories", "Directories", "DirsToSearch" }; + // Extracts a stable group key for an exception thrown by a per-domain + // ListFileStoreDirectories call. Strips domain-specific bits (the /_links/self/href + // URL changes per domain) so identical RBAC failures across hundreds of domains + // collapse into a single error group. + private static readonly Regex ErrorMessageRegex = new Regex( + "\"error\"\\s*:\\s*\\[\\s*\"([^\"]+)\"", + RegexOptions.Compiled | RegexOptions.Singleline); + + private static string ErrorSignatureOf(Exception ex) + { + var apiEx = DataPowerApiException.Find(ex); + if (apiEx != null) + { + var m = ErrorMessageRegex.Match(apiEx.ResponseBody ?? string.Empty); + if (m.Success) + return $"HTTP {(int)apiEx.StatusCode} {apiEx.StatusCode}: {m.Groups[1].Value}"; + return $"HTTP {(int)apiEx.StatusCode} {apiEx.StatusCode}"; + } + + var msg = ex?.Message ?? "(no message)"; + return msg.Length > 80 ? msg.Substring(0, 80) + "..." : msg; + } + private static (HashSet Dirs, string Source) ResolveDirsToSearch(DiscoveryJobConfiguration config) { if (config?.JobProperties != null) @@ -175,53 +199,81 @@ private JobResult PerformDiscovery(DiscoveryJobConfiguration config, SubmitDisco flow.Branch($"PerDomain (count={domainCount})"); try { + var listedOk = 0; + var emptyNameSkipped = 0; + + // Group per-domain failures by error signature instead of emitting a + // FAIL+SKIP line per failed domain. On appliances with 200+ domains and + // a non-trivial RBAC story, that easily produces a 50 KB summary which + // overflows Command's AgentJobStatus.Message column. One aggregated SKIP + // line per error class scales fine and is far more readable. + var errorGroups = new Dictionary>(StringComparer.Ordinal); + foreach (var domain in domains) { if (string.IsNullOrWhiteSpace(domain?.Name)) { - flow.Skip("Domain", "empty domain name"); + emptyNameSkipped++; continue; } + List directories; try { - List directories = null; - flow.Step($"ListFileStore-{domain.Name}", () => - { - directories = apiClient.ListFileStoreDirectories(domain.Name); - }); - - // DataPower's filestore location names carry a trailing colon - // (e.g. "cert:" / "pubcert:" / "sharedcert:"). Strip it before - // matching and before composing the store path. - var certDirectories = directories - .Select(d => d?.TrimEnd(':')) - .Where(d => !string.IsNullOrEmpty(d) && certStoreDirectories.Contains(d)) - .ToList(); - - var isDefault = string.Equals(domain.Name, DefaultDomainName, StringComparison.OrdinalIgnoreCase); - foreach (var dir in certDirectories) - { - if (ApplianceWideDirectories.Contains(dir) && !isDefault) - { - flow.Skip($"{domain.Name}\\{dir}", "appliance-wide; emitted only under default"); - continue; - } - - var storePath = $"{domain.Name}\\{dir}"; - discoveredLocations.Add(storePath); - flow.Step($"Discovered-{storePath}"); - } + directories = apiClient.ListFileStoreDirectories(domain.Name); } catch (Exception ex) { // Resilient by design: one inaccessible domain should not abort discovery - var inner = DescribeException(ex); - flow.Skip($"Domain-{domain.Name}", $"unable to list directories: {inner}"); - Logger.LogWarning(ex, "Unable to list filestore directories for domain {DomainName}: {ErrorMessage}", - domain.Name, inner); + var signature = ErrorSignatureOf(ex); + if (!errorGroups.TryGetValue(signature, out var list)) + { + list = new List(); + errorGroups[signature] = list; + } + list.Add(domain.Name); + Logger.LogWarning(ex, + "Unable to list filestore directories for domain {DomainName}: {ErrorMessage}", + domain.Name, DescribeException(ex)); + continue; + } + + listedOk++; + + // DataPower's filestore location names carry a trailing colon + // (e.g. "cert:" / "pubcert:" / "sharedcert:"). Strip it before + // matching and before composing the store path. + var certDirectories = directories + .Select(d => d?.TrimEnd(':')) + .Where(d => !string.IsNullOrEmpty(d) && certStoreDirectories.Contains(d)) + .ToList(); + + var isDefault = string.Equals(domain.Name, DefaultDomainName, StringComparison.OrdinalIgnoreCase); + foreach (var dir in certDirectories) + { + if (ApplianceWideDirectories.Contains(dir) && !isDefault) + continue; // appliance-wide; emitted only under default + + var storePath = $"{domain.Name}\\{dir}"; + discoveredLocations.Add(storePath); + flow.Step($"Discovered-{storePath}"); } } + + var listedDetail = $"listed={listedOk}/{domainCount}"; + if (emptyNameSkipped > 0) + listedDetail += $", emptyName={emptyNameSkipped}"; + if (errorGroups.Count > 0) + listedDetail += $", failed={errorGroups.Values.Sum(v => v.Count)}"; + flow.Step("ListFileStore", listedDetail); + + foreach (var kvp in errorGroups.OrderByDescending(g => g.Value.Count)) + { + var sample = kvp.Value.Take(5).ToList(); + var more = kvp.Value.Count - sample.Count; + var sampleStr = string.Join(", ", sample) + (more > 0 ? $" (+{more} more)" : ""); + flow.Skip($"DomainsFailed[{kvp.Key}]", $"{kvp.Value.Count} domain(s): {sampleStr}"); + } } finally {