From 7c70d3aa6584a4f094fced8851b40bd838b634b3 Mon Sep 17 00:00:00 2001 From: Bec Callow Date: Thu, 18 Jun 2026 10:01:09 +1000 Subject: [PATCH 1/4] Handle pending helm updates and installs --- .../HelmUpgradeWithKOSConvention.cs | 69 ++++++++++++++++++- .../Kubernetes/Integration/HelmCli.cs | 48 ++++++++++++- 2 files changed, 113 insertions(+), 4 deletions(-) diff --git a/source/Calamari/Kubernetes/Conventions/HelmUpgradeWithKOSConvention.cs b/source/Calamari/Kubernetes/Conventions/HelmUpgradeWithKOSConvention.cs index 7ff9ee13ad..106d224a12 100644 --- a/source/Calamari/Kubernetes/Conventions/HelmUpgradeWithKOSConvention.cs +++ b/source/Calamari/Kubernetes/Conventions/HelmUpgradeWithKOSConvention.cs @@ -53,14 +53,22 @@ public void Install(RunningDeployment deployment) kubectl.SetKubectl(); + // GetCurrentRevision returns null when the release doesn't exist yet; in that case + // there's nothing to recover from, so we skip the status check entirely. var currentRevisionNumber = helmCli.GetCurrentRevision(releaseName); + if (currentRevisionNumber != null && CheckAndHandleStuckRelease(helmCli, releaseName)) + { + // Re-read revision after recovery so newRevisionNumber reflects the post-rollback state. + // Skipped on the happy path (no recovery ran) since the revision cannot have changed. + currentRevisionNumber = helmCli.GetCurrentRevision(releaseName); + } var newRevisionNumber = (currentRevisionNumber ?? 0) + 1; //This is used to cancel KOS when the helm upgrade has completed //It does not cancel the get manifest var helmInstallCompletedCts = new CancellationTokenSource(); - + //This is used to cancel the get manifest when the helm install fails (and we are still trying to retrieve the manifest) var helmInstallErrorCts = new CancellationTokenSource(); @@ -71,7 +79,7 @@ public void Install(RunningDeployment deployment) valueSourcesParser, helmCli, namespaceResolver); - + executor.ExecuteHelmUpgrade(deployment, releaseName, newRevisionNumber, helmInstallCompletedCts, helmInstallErrorCts); }); @@ -105,5 +113,62 @@ string GetReleaseName(IVariables variables) log.Info($"Using Release Name {releaseName}"); return releaseName; } + + // Returns true if a recovery action was attempted (indicating the revision number may have changed). + bool CheckAndHandleStuckRelease(HelmCli helmCli, string releaseName) + { + var status = helmCli.GetReleaseStatus(releaseName); + + if (status == null) + return false; + + log.Info($"Release {releaseName} current status: {status}"); + + // Handle problematic states that could be left from cancelled deployments + switch (status.ToLowerInvariant()) + { + case "pending-install": + // No prior successful revision exists, so rollback is not possible. Uninstall the + // stuck release so the next upgrade --install can start cleanly. + log.Warn($"Release {releaseName} is stuck in {status} state, likely from a cancelled first install. Uninstalling to recover..."); + try + { + var uninstallResult = helmCli.Uninstall(releaseName); + if (uninstallResult.ExitCode == 0) + log.Info($"Successfully uninstalled stuck release {releaseName}"); + else + log.Warn($"Uninstall had non-zero exit code but continuing: {uninstallResult.ExitCode}"); + } + catch (Exception ex) + { + log.Warn($"Failed to uninstall release {releaseName}: {ex.Message}. Continuing with deployment..."); + } + return true; + + case "pending-upgrade": + log.Warn($"Release {releaseName} is stuck in {status} state, likely from a cancelled deployment. Rolling back to recover..."); + try + { + var rollbackResult = helmCli.Rollback(releaseName); + if (rollbackResult.ExitCode == 0) + log.Info($"Successfully rolled back release {releaseName}"); + else + log.Warn($"Rollback had non-zero exit code but continuing: {rollbackResult.ExitCode}"); + } + catch (Exception ex) + { + log.Warn($"Failed to rollback release {releaseName}: {ex.Message}. Continuing with deployment..."); + } + return true; + + case "failed": + log.Info($"Release {releaseName} is in failed state. Helm upgrade --install should handle this automatically."); + return false; + + default: + log.Verbose($"Release {releaseName} status: {status} - proceeding with deployment"); + return false; + } + } } } \ No newline at end of file diff --git a/source/Calamari/Kubernetes/Integration/HelmCli.cs b/source/Calamari/Kubernetes/Integration/HelmCli.cs index ed66704f48..fccb786b22 100644 --- a/source/Calamari/Kubernetes/Integration/HelmCli.cs +++ b/source/Calamari/Kubernetes/Integration/HelmCli.cs @@ -107,6 +107,51 @@ public SemanticVersion GetParsedExecutableVersion() return metadata.revision; } + public string? GetReleaseStatus(string releaseName) + { + var result = ExecuteCommandAndReturnOutput("status", releaseName, "-o json", NamespaceArg()); + + if (result.Result.ExitCode != 0) + { + // Log any error output so auth/RBAC/network failures are visible rather than silently returning null + var errorOutput = result.Output.MergeInfoLogs(); + if (!string.IsNullOrWhiteSpace(errorOutput)) + log.Verbose($"helm status returned exit code {result.Result.ExitCode}: {errorOutput}"); + return null; + } + + var json = result.Output.MergeInfoLogs(); + var status = JsonConvert.DeserializeAnonymousType(json, + new + { + info = new + { + status = "" + } + }); + + return status?.info?.status; + } + + public CommandResult Rollback(string releaseName, int? revision = null) + { + var args = new List { "rollback", releaseName }; + + if (revision.HasValue) + args.Add(revision.Value.ToString()); + + args.Add(NamespaceArg()); + + var result = ExecuteCommandAndLogOutput(args); + return result; + } + + public CommandResult Uninstall(string releaseName) + { + var args = new List { "uninstall", releaseName, NamespaceArg() }; + return ExecuteCommandAndLogOutput(args); + } + public string GetManifest(string releaseName, int revisionNumber) { var result = ExecuteCommandAndReturnOutput("get", "manifest", releaseName, $"--revision {revisionNumber}", NamespaceArg()); @@ -125,8 +170,7 @@ public CommandResult Upgrade(string releaseName, string packagePath, IEnumerable buildArgs.AddRange(upgradeArgs); buildArgs.Add(NamespaceArg()); - //properly quote the release name and package path (consistent with previous code) - buildArgs.Add($"\"{releaseName}\""); + buildArgs.Add(releaseName); buildArgs.Add($"\"{packagePath}\""); var result = ExecuteCommandAndLogOutput(buildArgs); From 124217c2bcc9a2a436b080add59198c76d487dcd Mon Sep 17 00:00:00 2001 From: Bec Callow Date: Thu, 18 Jun 2026 10:02:31 +1000 Subject: [PATCH 2/4] Improve azure error logging --- .../AzureResourceGroupOperator.cs | 148 ++++++++++++++++-- .../Bicep/DeployBicepTemplateBehaviour.cs | 2 +- .../DeployAzureResourceGroupBehaviour.cs | 2 +- 3 files changed, 141 insertions(+), 11 deletions(-) diff --git a/source/Calamari.AzureResourceGroup/AzureResourceGroupOperator.cs b/source/Calamari.AzureResourceGroup/AzureResourceGroupOperator.cs index 5504cd8685..f6bf3e8542 100644 --- a/source/Calamari.AzureResourceGroup/AzureResourceGroupOperator.cs +++ b/source/Calamari.AzureResourceGroup/AzureResourceGroupOperator.cs @@ -1,4 +1,6 @@ using System; +using System.Collections.Generic; +using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -9,7 +11,6 @@ using Azure.ResourceManager.Resources.Models; using Calamari.Common.Plumbing.Logging; using Calamari.Common.Plumbing.Variables; -using Newtonsoft.Json; using Newtonsoft.Json.Linq; using Polly; using Polly.Timeout; @@ -48,19 +49,27 @@ public async Task> CreateDeployment(Resource } } - public async Task PollForCompletionWithTimeout(ArmOperation deploymentOperation, IVariables variables) + public async Task PollForCompletionWithTimeout(ArmOperation deploymentOperation, + ResourceGroupResource resourceGroupResource, + string deploymentName, + IVariables variables) { var pollingTimeout = GetPollingTimeout(variables); var asyncResourceGroupPollingTimeoutPolicy = Policy.TimeoutAsync(pollingTimeout, TimeoutStrategy.Optimistic); - await asyncResourceGroupPollingTimeoutPolicy.ExecuteAsync(ct => Poll(deploymentOperation, ct), CancellationToken.None); + await asyncResourceGroupPollingTimeoutPolicy.ExecuteAsync(ct => Poll(deploymentOperation, resourceGroupResource, deploymentName, ct), CancellationToken.None); } - public async Task PollForCompletion(ArmOperation deploymentOperation) + public async Task PollForCompletion(ArmOperation deploymentOperation, + ResourceGroupResource resourceGroupResource, + string deploymentName) { - await Poll(deploymentOperation, CancellationToken.None); + await Poll(deploymentOperation, resourceGroupResource, deploymentName, CancellationToken.None); } - async Task Poll(ArmOperation deploymentOperation, CancellationToken cancellationToken) + async Task Poll(ArmOperation deploymentOperation, + ResourceGroupResource resourceGroupResource, + string deploymentName, + CancellationToken cancellationToken) { log.Info("Polling for deployment completion..."); try @@ -69,9 +78,15 @@ async Task Poll(ArmOperation deploymentOperation, Cancell var response = await deploymentOperation.WaitForCompletionAsync(delayStrategy, cancellationToken); log.Info($"Deployment completed with status: {response.Value?.Data.Properties?.ProvisioningState}"); } - catch + catch (RequestFailedException ex) + { + var enhancedMessage = await TryEnhanceDeploymentError(resourceGroupResource, deploymentName, ex); + log.Error(enhancedMessage); + throw; + } + catch (Exception ex) { - log.Error("Error polling for deployment completion"); + log.Error($"Error polling for deployment completion: {ex.Message}"); throw; } } @@ -98,13 +113,128 @@ async Task LogOperationResults(ArmOperation operation) sb.AppendLine($"Status: {properties.StatusCode}"); sb.AppendLine($"Provisioning State: {properties.ProvisioningState}"); if (properties.StatusMessage != null) - sb.AppendLine($"Status Message: {JsonConvert.SerializeObject(properties.StatusMessage)}"); + sb.AppendLine($"Status Message: {FormatStatusMessage(properties.StatusMessage)}"); sb.Append(" \n"); } log.Info(sb.ToString()); } + async Task TryEnhanceDeploymentError(ResourceGroupResource resourceGroupResource, + string deploymentName, + RequestFailedException originalException) + { + try + { + log.Verbose($"Attempting to retrieve detailed operation information for failed deployment '{deploymentName}'..."); + + ArmDeploymentResource? deploymentResource = null; + try + { + var deploymentResponse = await resourceGroupResource.GetArmDeploymentAsync(deploymentName); + if (deploymentResponse.HasValue) + deploymentResource = deploymentResponse.Value; + } + catch (Exception ex) + { + log.Verbose($"Could not retrieve deployment resource for error detail: {ex.Message}"); + } + + if (deploymentResource == null) + return $"Error polling for deployment completion: {originalException.Message}"; + + var operations = new List(); + var failureCount = 0; + var totalOperations = 0; + + await foreach (var op in deploymentResource.GetDeploymentOperationsAsync()) + { + totalOperations++; + var properties = op.Properties; + + if (properties?.ProvisioningState == "Failed") + { + failureCount++; + var resourceName = properties.TargetResource?.ResourceName ?? "Unknown Resource"; + var resourceType = properties.TargetResource?.ResourceType ?? "Unknown Type"; + + var failureDetail = $"\n [FAILED] {resourceType} '{resourceName}'"; + + if (properties.StatusMessage != null) + { + var errorInfo = ExtractAzureErrorInfo(properties.StatusMessage); + if (!string.IsNullOrWhiteSpace(errorInfo)) + failureDetail += $"\n Error: {errorInfo}"; + } + + if (properties.Timestamp.HasValue) + failureDetail += $"\n Failed at: {properties.Timestamp.Value.ToLocalTime():yyyy-MM-dd HH:mm:ss}"; + + operations.Add(failureDetail); + } + } + + log.Verbose($"Found {totalOperations} total operations, {failureCount} failed"); + + if (operations.Any()) + { + return $"Error polling for deployment completion: {originalException.Message}\n\n" + + $"FAILED AZURE RESOURCES ({failureCount} of {totalOperations} operations failed):" + + string.Join("", operations) + + "\n\nFor full details check Azure Portal > Resource Groups > Deployments, " + + "or see https://aka.ms/arm-deployment-operations for troubleshooting guidance."; + } + + if (totalOperations > 0) + { + return $"Error polling for deployment completion: {originalException.Message}\n\n" + + $"Found {totalOperations} deployment operations but none were marked as failed. " + + "Check the Azure Portal for detailed deployment status."; + } + + return $"Error polling for deployment completion: {originalException.Message}"; + } + catch (Exception enhancementEx) + { + log.Verbose($"Failed to retrieve detailed deployment error information: {enhancementEx.Message}"); + return $"Error polling for deployment completion: {originalException.Message}"; + } + } + + static string ExtractAzureErrorInfo(StatusMessage statusMessage) + { + var error = statusMessage.Error; + if (error == null) + return string.Empty; + + if (!string.IsNullOrWhiteSpace(error.Code) && !string.IsNullOrWhiteSpace(error.Message)) + return $"[{error.Code}] {error.Message}"; + if (!string.IsNullOrWhiteSpace(error.Message)) + return error.Message; + if (!string.IsNullOrWhiteSpace(error.Code)) + return error.Code; + + return string.Empty; + } + + static string FormatStatusMessage(StatusMessage statusMessage) + { + var errorInfo = ExtractAzureErrorInfo(statusMessage); + if (!string.IsNullOrWhiteSpace(errorInfo)) + return errorInfo; + + // Fall back to JSON for status messages without a typed error (e.g. success responses) + try + { + var json = JObject.FromObject(statusMessage); + return json.ToString(); + } + catch + { + return statusMessage.ToString() ?? string.Empty; + } + } + void CaptureOutputs(string? outputsJson, IVariables variables) { if (string.IsNullOrWhiteSpace(outputsJson)) diff --git a/source/Calamari.AzureResourceGroup/Bicep/DeployBicepTemplateBehaviour.cs b/source/Calamari.AzureResourceGroup/Bicep/DeployBicepTemplateBehaviour.cs index e5c4da7f7e..2be3e5ceab 100644 --- a/source/Calamari.AzureResourceGroup/Bicep/DeployBicepTemplateBehaviour.cs +++ b/source/Calamari.AzureResourceGroup/Bicep/DeployBicepTemplateBehaviour.cs @@ -50,7 +50,7 @@ public async Task Execute(RunningDeployment context) deploymentMode, template, parameters); - await resourceGroupOperator.PollForCompletion(deploymentOperation); + await resourceGroupOperator.PollForCompletion(deploymentOperation, resourceGroup, armDeploymentName); await resourceGroupOperator.FinalizeDeployment(deploymentOperation, context.Variables); } diff --git a/source/Calamari.AzureResourceGroup/DeployAzureResourceGroupBehaviour.cs b/source/Calamari.AzureResourceGroup/DeployAzureResourceGroupBehaviour.cs index 21a2423b9f..be0f6d0b5f 100644 --- a/source/Calamari.AzureResourceGroup/DeployAzureResourceGroupBehaviour.cs +++ b/source/Calamari.AzureResourceGroup/DeployAzureResourceGroupBehaviour.cs @@ -71,7 +71,7 @@ public async Task Execute(RunningDeployment context) deploymentMode, template, parameters); - await azureResourceGroupOperator.PollForCompletionWithTimeout(deploymentOperation, variables); + await azureResourceGroupOperator.PollForCompletionWithTimeout(deploymentOperation, resourceGroupResource, deploymentName, variables); await azureResourceGroupOperator.FinalizeDeployment(deploymentOperation, variables); } } \ No newline at end of file From b8197e40b21d68ba5901cdd660060378f1549bb3 Mon Sep 17 00:00:00 2001 From: Bec Callow Date: Mon, 29 Jun 2026 14:38:58 +1000 Subject: [PATCH 3/4] Revert helm KOS stuck-release recovery (included by mistake) This change belongs in a separate PR. Reverts commit 7c70d3aa6. Co-Authored-By: Claude Sonnet 4.6 --- .../HelmUpgradeWithKOSConvention.cs | 69 +------------------ .../Kubernetes/Integration/HelmCli.cs | 48 +------------ 2 files changed, 4 insertions(+), 113 deletions(-) diff --git a/source/Calamari/Kubernetes/Conventions/HelmUpgradeWithKOSConvention.cs b/source/Calamari/Kubernetes/Conventions/HelmUpgradeWithKOSConvention.cs index 106d224a12..7ff9ee13ad 100644 --- a/source/Calamari/Kubernetes/Conventions/HelmUpgradeWithKOSConvention.cs +++ b/source/Calamari/Kubernetes/Conventions/HelmUpgradeWithKOSConvention.cs @@ -53,22 +53,14 @@ public void Install(RunningDeployment deployment) kubectl.SetKubectl(); - // GetCurrentRevision returns null when the release doesn't exist yet; in that case - // there's nothing to recover from, so we skip the status check entirely. var currentRevisionNumber = helmCli.GetCurrentRevision(releaseName); - if (currentRevisionNumber != null && CheckAndHandleStuckRelease(helmCli, releaseName)) - { - // Re-read revision after recovery so newRevisionNumber reflects the post-rollback state. - // Skipped on the happy path (no recovery ran) since the revision cannot have changed. - currentRevisionNumber = helmCli.GetCurrentRevision(releaseName); - } var newRevisionNumber = (currentRevisionNumber ?? 0) + 1; //This is used to cancel KOS when the helm upgrade has completed //It does not cancel the get manifest var helmInstallCompletedCts = new CancellationTokenSource(); - + //This is used to cancel the get manifest when the helm install fails (and we are still trying to retrieve the manifest) var helmInstallErrorCts = new CancellationTokenSource(); @@ -79,7 +71,7 @@ public void Install(RunningDeployment deployment) valueSourcesParser, helmCli, namespaceResolver); - + executor.ExecuteHelmUpgrade(deployment, releaseName, newRevisionNumber, helmInstallCompletedCts, helmInstallErrorCts); }); @@ -113,62 +105,5 @@ string GetReleaseName(IVariables variables) log.Info($"Using Release Name {releaseName}"); return releaseName; } - - // Returns true if a recovery action was attempted (indicating the revision number may have changed). - bool CheckAndHandleStuckRelease(HelmCli helmCli, string releaseName) - { - var status = helmCli.GetReleaseStatus(releaseName); - - if (status == null) - return false; - - log.Info($"Release {releaseName} current status: {status}"); - - // Handle problematic states that could be left from cancelled deployments - switch (status.ToLowerInvariant()) - { - case "pending-install": - // No prior successful revision exists, so rollback is not possible. Uninstall the - // stuck release so the next upgrade --install can start cleanly. - log.Warn($"Release {releaseName} is stuck in {status} state, likely from a cancelled first install. Uninstalling to recover..."); - try - { - var uninstallResult = helmCli.Uninstall(releaseName); - if (uninstallResult.ExitCode == 0) - log.Info($"Successfully uninstalled stuck release {releaseName}"); - else - log.Warn($"Uninstall had non-zero exit code but continuing: {uninstallResult.ExitCode}"); - } - catch (Exception ex) - { - log.Warn($"Failed to uninstall release {releaseName}: {ex.Message}. Continuing with deployment..."); - } - return true; - - case "pending-upgrade": - log.Warn($"Release {releaseName} is stuck in {status} state, likely from a cancelled deployment. Rolling back to recover..."); - try - { - var rollbackResult = helmCli.Rollback(releaseName); - if (rollbackResult.ExitCode == 0) - log.Info($"Successfully rolled back release {releaseName}"); - else - log.Warn($"Rollback had non-zero exit code but continuing: {rollbackResult.ExitCode}"); - } - catch (Exception ex) - { - log.Warn($"Failed to rollback release {releaseName}: {ex.Message}. Continuing with deployment..."); - } - return true; - - case "failed": - log.Info($"Release {releaseName} is in failed state. Helm upgrade --install should handle this automatically."); - return false; - - default: - log.Verbose($"Release {releaseName} status: {status} - proceeding with deployment"); - return false; - } - } } } \ No newline at end of file diff --git a/source/Calamari/Kubernetes/Integration/HelmCli.cs b/source/Calamari/Kubernetes/Integration/HelmCli.cs index fccb786b22..ed66704f48 100644 --- a/source/Calamari/Kubernetes/Integration/HelmCli.cs +++ b/source/Calamari/Kubernetes/Integration/HelmCli.cs @@ -107,51 +107,6 @@ public SemanticVersion GetParsedExecutableVersion() return metadata.revision; } - public string? GetReleaseStatus(string releaseName) - { - var result = ExecuteCommandAndReturnOutput("status", releaseName, "-o json", NamespaceArg()); - - if (result.Result.ExitCode != 0) - { - // Log any error output so auth/RBAC/network failures are visible rather than silently returning null - var errorOutput = result.Output.MergeInfoLogs(); - if (!string.IsNullOrWhiteSpace(errorOutput)) - log.Verbose($"helm status returned exit code {result.Result.ExitCode}: {errorOutput}"); - return null; - } - - var json = result.Output.MergeInfoLogs(); - var status = JsonConvert.DeserializeAnonymousType(json, - new - { - info = new - { - status = "" - } - }); - - return status?.info?.status; - } - - public CommandResult Rollback(string releaseName, int? revision = null) - { - var args = new List { "rollback", releaseName }; - - if (revision.HasValue) - args.Add(revision.Value.ToString()); - - args.Add(NamespaceArg()); - - var result = ExecuteCommandAndLogOutput(args); - return result; - } - - public CommandResult Uninstall(string releaseName) - { - var args = new List { "uninstall", releaseName, NamespaceArg() }; - return ExecuteCommandAndLogOutput(args); - } - public string GetManifest(string releaseName, int revisionNumber) { var result = ExecuteCommandAndReturnOutput("get", "manifest", releaseName, $"--revision {revisionNumber}", NamespaceArg()); @@ -170,7 +125,8 @@ public CommandResult Upgrade(string releaseName, string packagePath, IEnumerable buildArgs.AddRange(upgradeArgs); buildArgs.Add(NamespaceArg()); - buildArgs.Add(releaseName); + //properly quote the release name and package path (consistent with previous code) + buildArgs.Add($"\"{releaseName}\""); buildArgs.Add($"\"{packagePath}\""); var result = ExecuteCommandAndLogOutput(buildArgs); From 772494a7ebf559402b391c05a7f4167981666a34 Mon Sep 17 00:00:00 2001 From: Bec Callow Date: Mon, 29 Jun 2026 15:31:04 +1000 Subject: [PATCH 4/4] Clean up TryEnhanceDeploymentError and FormatStatusMessage - Extract repeated base error message string into a local variable - Rename operations list to failedOperations for clarity - Remove redundant failureCount variable (operations.Count is equivalent) - Remove JObject.FromObject fallback in FormatStatusMessage which silently dropped ARM JSON fields not mapped to StatusMessage's public properties Co-Authored-By: Claude Sonnet 4.6 --- .../AzureResourceGroupOperator.cs | 36 +++++++------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/source/Calamari.AzureResourceGroup/AzureResourceGroupOperator.cs b/source/Calamari.AzureResourceGroup/AzureResourceGroupOperator.cs index f6bf3e8542..fab8c85bc4 100644 --- a/source/Calamari.AzureResourceGroup/AzureResourceGroupOperator.cs +++ b/source/Calamari.AzureResourceGroup/AzureResourceGroupOperator.cs @@ -124,6 +124,7 @@ async Task TryEnhanceDeploymentError(ResourceGroupResource resourceGroup string deploymentName, RequestFailedException originalException) { + var baseMessage = $"Error polling for deployment completion: {originalException.Message}"; try { log.Verbose($"Attempting to retrieve detailed operation information for failed deployment '{deploymentName}'..."); @@ -141,10 +142,9 @@ async Task TryEnhanceDeploymentError(ResourceGroupResource resourceGroup } if (deploymentResource == null) - return $"Error polling for deployment completion: {originalException.Message}"; + return baseMessage; - var operations = new List(); - var failureCount = 0; + var failedOperations = new List(); var totalOperations = 0; await foreach (var op in deploymentResource.GetDeploymentOperationsAsync()) @@ -154,7 +154,6 @@ async Task TryEnhanceDeploymentError(ResourceGroupResource resourceGroup if (properties?.ProvisioningState == "Failed") { - failureCount++; var resourceName = properties.TargetResource?.ResourceName ?? "Unknown Resource"; var resourceType = properties.TargetResource?.ResourceType ?? "Unknown Type"; @@ -170,34 +169,34 @@ async Task TryEnhanceDeploymentError(ResourceGroupResource resourceGroup if (properties.Timestamp.HasValue) failureDetail += $"\n Failed at: {properties.Timestamp.Value.ToLocalTime():yyyy-MM-dd HH:mm:ss}"; - operations.Add(failureDetail); + failedOperations.Add(failureDetail); } } - log.Verbose($"Found {totalOperations} total operations, {failureCount} failed"); + log.Verbose($"Found {totalOperations} total operations, {failedOperations.Count} failed"); - if (operations.Any()) + if (failedOperations.Any()) { - return $"Error polling for deployment completion: {originalException.Message}\n\n" + - $"FAILED AZURE RESOURCES ({failureCount} of {totalOperations} operations failed):" + - string.Join("", operations) + + return $"{baseMessage}\n\n" + + $"FAILED AZURE RESOURCES ({failedOperations.Count} of {totalOperations} operations failed):" + + string.Join("", failedOperations) + "\n\nFor full details check Azure Portal > Resource Groups > Deployments, " + "or see https://aka.ms/arm-deployment-operations for troubleshooting guidance."; } if (totalOperations > 0) { - return $"Error polling for deployment completion: {originalException.Message}\n\n" + + return $"{baseMessage}\n\n" + $"Found {totalOperations} deployment operations but none were marked as failed. " + "Check the Azure Portal for detailed deployment status."; } - return $"Error polling for deployment completion: {originalException.Message}"; + return baseMessage; } catch (Exception enhancementEx) { log.Verbose($"Failed to retrieve detailed deployment error information: {enhancementEx.Message}"); - return $"Error polling for deployment completion: {originalException.Message}"; + return baseMessage; } } @@ -223,16 +222,7 @@ static string FormatStatusMessage(StatusMessage statusMessage) if (!string.IsNullOrWhiteSpace(errorInfo)) return errorInfo; - // Fall back to JSON for status messages without a typed error (e.g. success responses) - try - { - var json = JObject.FromObject(statusMessage); - return json.ToString(); - } - catch - { - return statusMessage.ToString() ?? string.Empty; - } + return statusMessage.ToString() ?? string.Empty; } void CaptureOutputs(string? outputsJson, IVariables variables)