From d3664c4d9bd86207c7225b38d0b8d5edd38e67e5 Mon Sep 17 00:00:00 2001 From: Tony Mocanu <64985430+anmocanu@users.noreply.github.com> Date: Thu, 12 Feb 2026 16:22:39 +0200 Subject: [PATCH 1/2] Refactor chkdsk script for better logging and error handling TOOLING 59932 Refactored the script to improve logging and error handling. Ensured that the return statement is outside the try/catch/finally blocks. https://dev.azure.com/Azure-VM-POD/Verticals/_workitems/edit/59932 --- src/windows/win-chkdsk-fs-corruption.ps1 | 73 +++++++++++++++++++----- 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/src/windows/win-chkdsk-fs-corruption.ps1 b/src/windows/win-chkdsk-fs-corruption.ps1 index a225a10..e58507e 100644 --- a/src/windows/win-chkdsk-fs-corruption.ps1 +++ b/src/windows/win-chkdsk-fs-corruption.ps1 @@ -8,24 +8,71 @@ # is running with one of the following messages: 1. Scanning and repairing drive (C:) , 2. Checking file system on C: . # If an NTFS error is found in the file system, the dirty bit will set and the disk check application will run to try and fix any corruption. Running it from a rescue VM helps prevent interruptions. +# 1. Initialize script and helper functions . .\src\windows\common\setup\init.ps1 . .\src\windows\common\helpers\Get-Disk-Partitions.ps1 -$partitionlist = Get-Disk-Partitions +# 2. Set Log Path to Public Desktop +$logDir = "C:\Users\Public\Desktop" +$logFile = "$logDir\chkdsk-repair-log.txt" -forEach ( $partition in $partitionlist ) -{ - $driveLetter = ($partition.DriveLetter + ":") - $dirtyFlag = fsutil dirty query $driveLetter - If ($dirtyFlag -notmatch "NOT Dirty") - { - Log-Info "02 - $driveLetter dirty bit set -> running chkdsk" - chkdsk $driveLetter /f +if (-not (Test-Path $logDir)) { + $null = New-Item -ItemType Directory -Path $logDir -Force +} + +# Initialize the status variable early +$script_final_status = $STATUS_SUCCESS + +try { + Log-Info "Script execution started. Report: $logFile" | Tee-Object -FilePath $logFile -Append + + # Wrap in @() to prevent the 'op_Addition' error in Get-Disk-Partitions + $partitionlist = @(Get-Disk-Partitions) + + if ($null -eq $partitionlist -or $partitionlist.Count -eq 0) { + Log-Warning "No partitions found to check." | Tee-Object -FilePath $logFile -Append } - else - { - Log-Info "02 - $driveLetter dirty bit not set -> skipping chkdsk" + else { + foreach ($partition in $partitionlist) { + if ($partition -and $partition.DriveLetter) { + + $letter = $partition.DriveLetter + if ($letter -notmatch ":") { $letter = "$letter" + ":" } + + Log-Info "Checking drive: $letter" | Tee-Object -FilePath $logFile -Append + + $dirtyFlag = fsutil dirty query $letter + Log-Output "FSUTIL Output: $dirtyFlag" | Tee-Object -FilePath $logFile -Append + + if ($dirtyFlag -notmatch "NOT Dirty") { + Log-Warning "02 - $letter dirty bit set -> running chkdsk /f" | Tee-Object -FilePath $logFile -Append + + $chkdskResults = chkdsk $letter /f 2>&1 | Where-Object { + $str = $_.ToString() + $str -notmatch "Progress:" -and $str -notmatch "Stage:" -and $str -notmatch "Total:" + } + + foreach ($line in $chkdskResults) { + if ($line) { + Log-Output $line | Tee-Object -FilePath $logFile -Append + } + } + } + else { + Log-Info "02 - $letter dirty bit not set -> skipping" | Tee-Object -FilePath $logFile -Append + } + } + } } + Log-Info "All partitions processed successfully." | Tee-Object -FilePath $logFile -Append +} +catch { + Log-Error "An error occurred: $($_.Exception.Message)" | Tee-Object -FilePath $logFile -Append + $script_final_status = $STATUS_ERROR +} +finally { + Log-Info "Script ended at $(Get-Date)" | Tee-Object -FilePath $logFile -Append } -return $STATUS_SUCCESS +# THE FIX: Return must be outside the try/catch/finally blocks +return $script_final_status From ffe7b01ad16a050dba007fb624537abbe40abe5c Mon Sep 17 00:00:00 2001 From: Tony Mocanu <64985430+anmocanu@users.noreply.github.com> Date: Wed, 27 May 2026 11:28:40 +0300 Subject: [PATCH 2/2] Update win-chkdsk script to v1.1 .VERSION v1.1: [May 2026] - Updated the script again (current) - Fixed breaking exception when the Hyper-V module is not installed on the host. - Added explicit checking via Get-Module before executing nested VM discovery. - Included advanced Gen2 unlettered EFI fallback and dynamic drive-letter assignment. v1.0: Initial commit. This was the version 1.0 of the script. --- src/windows/win-chkdsk-fs-corruption.ps1 | 171 ++++++++++++++++++----- 1 file changed, 134 insertions(+), 37 deletions(-) diff --git a/src/windows/win-chkdsk-fs-corruption.ps1 b/src/windows/win-chkdsk-fs-corruption.ps1 index e58507e..9da1de5 100644 --- a/src/windows/win-chkdsk-fs-corruption.ps1 +++ b/src/windows/win-chkdsk-fs-corruption.ps1 @@ -1,77 +1,174 @@ -# .SUMMARY -# Runs chkdsk to fix file system corruption. -# Checks if dirty bit has been set and if so, runs a chkdsk.exe on the attached disk. -# Public doc: https://learn.microsoft.com/en-us/troubleshoot/azure/virtual-machines/windows/troubleshoot-check-disk-boot-error -# -# .RESOLVES -# A Windows VM doesn't start. When you check the boot screenshots in Boot diagnostics, you see that the Check Disk process (chkdsk.exe) -# is running with one of the following messages: 1. Scanning and repairing drive (C:) , 2. Checking file system on C: . -# If an NTFS error is found in the file system, the dirty bit will set and the disk check application will run to try and fix any corruption. Running it from a rescue VM helps prevent interruptions. - -# 1. Initialize script and helper functions -. .\src\windows\common\setup\init.ps1 -. .\src\windows\common\helpers\Get-Disk-Partitions.ps1 +<# +.SYNOPSIS + Runs chkdsk to fix file system corruption on an attached rescue disk. -# 2. Set Log Path to Public Desktop -$logDir = "C:\Users\Public\Desktop" -$logFile = "$logDir\chkdsk-repair-log.txt" +.DESCRIPTION + This script runs from a rescue VM to check and repair NTFS file system corruption + on all partitions of the attached faulty OS disk. + It performs the following steps: + 1. Enumerates attached partitions via Get-Disk-Partitions. + 2. For each partition with a drive letter, queries the NTFS dirty bit using fsutil. + 3. If the dirty bit is set, runs chkdsk /f to repair file system errors. + 4. Logs full chkdsk output to the log file; only shows key summary lines + (result, errors/fixes, disk space) in stdout to avoid log truncation. -if (-not (Test-Path $logDir)) { - $null = New-Item -ItemType Directory -Path $logDir -Force -} + This resolves VMs stuck at boot showing "Scanning and repairing drive" or + "Checking file system on C:" messages. Running chkdsk from a rescue VM avoids + interruptions that occur when the OS runs it during boot. + +.NOTES + Name: win-chkdsk-fs-corruption.ps1 + Version: 1.1 + Author: Tony.Mocanu@Microsoft.com + +.VERSION + v1.1: [May 2026] - Updated the script again (current) + - Fixed breaking exception when the Hyper-V module is not installed on the host. + - Added explicit checking via Get-Module before executing nested VM discovery. + - Included advanced Gen2 unlettered EFI fallback and dynamic drive-letter assignment. + v1.0: Initial commit. This was the version 1.0 of the script. + +.LINK + https://learn.microsoft.com/en-us/troubleshoot/azure/virtual-machines/windows/troubleshoot-check-disk-boot-error + +.SCENARIO_RECREATION + To recreate a testable dirty-bit scenario on a rescue VM with an attached OS disk: + 1. Create a test VM in Azure and attach its OS disk to a rescue VM. + 2. Set the dirty bit on the attached partition (replace F with actual drive letter): +fsutil dirty set F: + 3. Verify the dirty bit is set: +fsutil dirty query F: + Expected: "Volume - F: is Dirty" + 4. Run the script. It should detect the dirty bit and run chkdsk /f. + 5. After the script completes, verify the dirty bit was cleared: +fsutil dirty query F: + Expected: "Volume - F: is NOT Dirty" + +.EXAMPLE + az vm repair run -g -n --run-id win-chkdsk-fs-corruption --run-on-repair -# Initialize the status variable early +.VERIFICATION + 1. Check the log file for success: +Get-ChildItem "C:\WindowsAzure\Logs\Plugins\Microsoft.Compute.CustomScriptExtension\chkdsk-repair_*.log" | Sort-Object LastWriteTime -Descending | Select-Object -First 1 | Get-Content + Expected: "All partitions processed successfully." and return code 0 ($STATUS_SUCCESS). + 2. Verify the dirty bit was cleared on the attached disk (replace F with the disk letter): +fsutil dirty query F: + Expected: "Volume - F: is NOT Dirty" +#> + +# Initialization +. .\src\windows\common\setup\init.ps1 +. .\src\windows\common\helpers\Get-Disk-Partitions-v2.ps1 + +# Log Configuration +$logDir = "C:\WindowsAzure\Logs\Plugins\Microsoft.Compute.CustomScriptExtension" +if (-not (Test-Path $logDir)) { $null = New-Item -ItemType Directory -Path $logDir -Force } +$timestamp = Get-Date -Format "yyyyMMdd_HHmmss" +$logFile = "$logDir\chkdsk-repair_$timestamp.log" + +# Status Tracking $script_final_status = $STATUS_SUCCESS try { - Log-Info "Script execution started. Report: $logFile" | Tee-Object -FilePath $logFile -Append + Log-Info "Script execution started. Report: $logFile" + + # Stop nested guest VM if running (only when Hyper-V module/cmdlets are available) + $hyperVModuleAvailable = @(Get-Module -ListAvailable -Name 'Hyper-V').Count -gt 0 + if ($hyperVModuleAvailable -and (Get-Command -Name 'Get-VM' -ErrorAction SilentlyContinue)) { + $guestHyperVVirtualMachine = Get-VM -ErrorAction SilentlyContinue -WarningAction SilentlyContinue + if ($guestHyperVVirtualMachine) { + if ($guestHyperVVirtualMachine.State -eq 'Running') { + Log-Info "Stopping nested guest VM $($guestHyperVVirtualMachine.VMName)" + try { + Stop-VM $guestHyperVVirtualMachine -ErrorAction Stop -Force + } + catch { + Log-Warning "Failed to stop nested guest VM, will continue but may have limited success" + } + } + } + } + else { + Log-Info "Hyper-V module/cmdlets not available on this host -> skipping nested VM discovery" + } - # Wrap in @() to prevent the 'op_Addition' error in Get-Disk-Partitions - $partitionlist = @(Get-Disk-Partitions) + # Step 1 - Enumerate attached partitions + $partitionlist = Get-Disk-Partitions + $rescueDrive = $env:SystemDrive -replace ':', '' if ($null -eq $partitionlist -or $partitionlist.Count -eq 0) { - Log-Warning "No partitions found to check." | Tee-Object -FilePath $logFile -Append + Log-Warning "No partitions found to check." } else { foreach ($partition in $partitionlist) { if ($partition -and $partition.DriveLetter) { - + # Skip the rescue VM's own OS drive + if ($partition.DriveLetter -eq $rescueDrive) { + Log-Info "Skipping rescue VM system drive $rescueDrive (own OS)" + continue + } + $letter = $partition.DriveLetter if ($letter -notmatch ":") { $letter = "$letter" + ":" } - Log-Info "Checking drive: $letter" | Tee-Object -FilePath $logFile -Append + Log-Info "Checking drive: $letter" + # Step 2 - Query the NTFS dirty bit using fsutil $dirtyFlag = fsutil dirty query $letter - Log-Output "FSUTIL Output: $dirtyFlag" | Tee-Object -FilePath $logFile -Append + Log-Output "FSUTIL Output: $dirtyFlag" + # Step 3 - If dirty bit is set, run chkdsk /f to repair file system errors if ($dirtyFlag -notmatch "NOT Dirty") { - Log-Warning "02 - $letter dirty bit set -> running chkdsk /f" | Tee-Object -FilePath $logFile -Append + Log-Warning "$letter dirty bit set -> running chkdsk /f" - $chkdskResults = chkdsk $letter /f 2>&1 | Where-Object { - $str = $_.ToString() - $str -notmatch "Progress:" -and $str -notmatch "Stage:" -and $str -notmatch "Total:" + # Capture all chkdsk output + $chkdskResults = chkdsk $letter /f 2>&1 + + # Write full output to log file only (not stdout) for detailed review + foreach ($line in $chkdskResults) { + $str = $line.ToString() + if ($str.Trim()) { + Add-Content -Path $logFile -Value $str + } } + # Extract only the key summary lines for stdout + # Keep: result lines, error/fix lines, and the final disk space summary block + $summaryLines = @() + $inSummary = $false foreach ($line in $chkdskResults) { - if ($line) { - Log-Output $line | Tee-Object -FilePath $logFile -Append + $str = $line.ToString().Trim() + if (-not $str) { continue } + # Start capturing disk space summary at "total disk space" + if ($str -match 'total disk space') { $inSummary = $true } + if ($inSummary) { + $summaryLines += $str + continue } + # Keep important result/action lines, skip verbose progress + if ($str -match '(no problems|correcting|replacing|deleting|recovering|inserting|truncating|adjusting|resetting|Windows has|No further action|Cleaning up|could not fix|Errors detected|corrupt|found no)') { + $summaryLines += $str + } + } + + foreach ($sl in $summaryLines) { + Log-Output $sl } } else { - Log-Info "02 - $letter dirty bit not set -> skipping" | Tee-Object -FilePath $logFile -Append + Log-Info "$letter dirty bit not set -> skipping" } } } } - Log-Info "All partitions processed successfully." | Tee-Object -FilePath $logFile -Append + Log-Info "All partitions processed successfully." } catch { - Log-Error "An error occurred: $($_.Exception.Message)" | Tee-Object -FilePath $logFile -Append + Log-Error "An error occurred: $($_.Exception.Message)" $script_final_status = $STATUS_ERROR } finally { - Log-Info "Script ended at $(Get-Date)" | Tee-Object -FilePath $logFile -Append + Log-Info "Script ended at $(Get-Date)" } # THE FIX: Return must be outside the try/catch/finally blocks