Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions .cspell.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"$schema": "https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json",
"version": "0.2",
"language": "en",
"language": "en,en-GB",
"dictionaries": ["csharp", "typescript", "powershell", "npm", "dotnet", "bash", "markdown"],
"enableGlobDot": true,
"minWordLength": 4,
Expand Down Expand Up @@ -69,6 +69,7 @@
"methodimpl",
"iequatable",
"IEquatable",
"IMGUI",
"inlinable",
"inlines",
"customisation",
Expand Down Expand Up @@ -97,6 +98,7 @@
"pygments",
"kwds",
"arithmatex",
"apos",
"linenums",
"inlinehilite",
"cairosvg",
Expand Down Expand Up @@ -173,6 +175,7 @@
"Slru",
"RAII",
"raii",
"rvalue",
"Configurator",
"Initializable",
"Hipple",
Expand Down Expand Up @@ -204,6 +207,7 @@
"Fira",
"APFS",
"NTFS",
"ENOENT",
"nocasematch",
"émoji",
"directx",
Expand All @@ -213,8 +217,35 @@
"llmstxt",
"llms",
"LLMS",
"callvirt",
"Callvirt",
"customised",
"dedup",
"dedup'd",
"Dedups",
"desync",
"unmatch"
"fqns",
"Indirected",
"initialised",
"ldsfld",
"Ldsfld",
"ldstr",
"Ldstr",
"materialised",
"misaligning",
"normalise",
"Normalise",
"normalises",
"ordinally",
"recognised",
"recompiles",
"reentrancy",
"serialisable",
"unconfigured",
"unrecognised",
"Unrecognised",
"unmatch",
"unstubbed"
],
"ignoreRegExpList": [
"/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}/g",
Expand Down
21 changes: 17 additions & 4 deletions .github/scripts/check-markdown-links.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ function Normalize-Name {
}

$issueCount = 0
$scannedFileCount = 0
$issuesByFile = @{}

# Exclude typical directories that shouldn't be scanned
$excludeDirs = @('.git', 'node_modules', '.vs', '.venv', '.artifacts', 'site', 'Library', 'Obj', 'Temp', 'Samples~')
Expand All @@ -28,6 +30,7 @@ $mdFiles = Get-ChildItem -Path $Root -Recurse -File -Filter *.md |
$pattern = '(?<!\!)\[(?<text>[^\]]+)\]\((?<target>[^)\s]+)(?:\s+"[^"]*")?\)'

foreach ($file in $mdFiles) {
$scannedFileCount++
$lines = Get-Content -LiteralPath $file.FullName -Encoding UTF8
$inCodeBlock = $false
$codeFencePattern = $null
Expand All @@ -37,11 +40,12 @@ foreach ($file in $mdFiles) {

# Skip fenced code blocks
$trimmedLine = $line.TrimStart()
if ($trimmedLine -match '^(`{3,})') {
$trimmedFenceLine = $trimmedLine.Trim()
if ($trimmedLine -match '^(?<fence>`{3,}|~{3,})') {
if (-not $inCodeBlock) {
$inCodeBlock = $true
$codeFencePattern = $Matches[1]
} elseif ($trimmedLine.StartsWith($codeFencePattern) -and $trimmedLine.Trim() -match "^$([regex]::Escape($codeFencePattern))") {
$codeFencePattern = $Matches['fence']
} elseif ($trimmedFenceLine -eq $codeFencePattern) {
$inCodeBlock = $false
$codeFencePattern = $null
}
Expand Down Expand Up @@ -82,6 +86,10 @@ foreach ($file in $mdFiles) {

if ($isExactFileName -or $looksLikePath -or $looksLikeMarkdownFileName) {
$issueCount++
if (-not $issuesByFile.ContainsKey($file.FullName)) {
$issuesByFile[$file.FullName] = 0
}
$issuesByFile[$file.FullName]++
$lineNo = $i + 1
$msg = "Link text '$text' should be human-readable, not a raw file name or path"
# GitHub Actions annotation
Expand All @@ -92,10 +100,15 @@ foreach ($file in $mdFiles) {
}

if ($issueCount -gt 0) {
Write-Host "Scanned $scannedFileCount markdown file(s) under '$Root'." -ForegroundColor Yellow
Write-Host "Issue count by file:" -ForegroundColor Yellow
foreach ($entry in ($issuesByFile.GetEnumerator() | Sort-Object Name)) {
Write-Host " - $($entry.Name): $($entry.Value)" -ForegroundColor Yellow
}
Write-Host "Found $issueCount documentation link(s) with non-human-readable text." -ForegroundColor Red
Write-Host "Use a descriptive phrase instead of the raw file name."
exit 1
}
else {
Write-Host "All markdown links have human-readable text."
Write-Host "Scanned $scannedFileCount markdown file(s); all markdown-to-markdown links use human-readable text."
}
100 changes: 70 additions & 30 deletions .github/scripts/check_markdown_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,18 @@
import urllib.parse


EXCLUDE_DIRS = {".git", "node_modules", ".vs"}
EXCLUDE_DIRS = {
".git",
"node_modules",
".vs",
".venv",
".artifacts",
"site",
"Library",
"Obj",
"Temp",
"Samples~",
}


LINK_RE = re.compile(r"(?<!\!)\[(?P<text>[^\]]+)\]\((?P<target>[^)\s]+)(?:\s+\"[^\"]*\")?\)")
Expand Down Expand Up @@ -66,17 +77,24 @@ def check_code_fence(stripped_line: str, in_code_block: bool, code_fence_pattern
Returns:
Tuple of (new_in_code_block, new_code_fence_pattern, is_fence_line)
"""
if not stripped_line.startswith("```"):
if not stripped_line:
return in_code_block, code_fence_pattern, False

# Count the backticks at the start
backtick_count = 0
fence_char = stripped_line[0]
if fence_char not in ("`", "~"):
return in_code_block, code_fence_pattern, False

if not stripped_line.startswith(fence_char * 3):
return in_code_block, code_fence_pattern, False

# Count the fence characters at the start.
fence_count = 0
for ch in stripped_line:
if ch == "`":
backtick_count += 1
if ch == fence_char:
fence_count += 1
else:
break
fence = "`" * backtick_count
fence = fence_char * fence_count

if not in_code_block:
# Entering a code block
Expand Down Expand Up @@ -148,37 +166,59 @@ def check_file_content(lines: list) -> list:
return issues


def main(root: str) -> int:
issues = 0
def iter_markdown_files(root: str):
"""Yield markdown files under root in deterministic order."""
for dirpath, dirnames, filenames in os.walk(root):
# prune excluded directories
dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS]
for filename in filenames:
if not filename.lower().endswith(".md"):
continue
path = os.path.join(dirpath, filename)
try:
with open(path, "r", encoding="utf-8") as f:
lines = f.readlines()
except Exception:
# Skip files that cannot be read (permission errors, encoding issues, etc.)
continue

file_issues = check_file_content(lines)
for line_num, text, target in file_issues:
issues += 1
msg = f"{path}:{line_num}: Link text '{text}' should be human-readable, not a raw file name or path (target: {target})"
print(msg)

if issues:
# Prune excluded directories and sort for deterministic output across platforms.
dirnames[:] = sorted(d for d in dirnames if d not in EXCLUDE_DIRS)
for filename in sorted(filenames):
if filename.lower().endswith(".md"):
yield os.path.join(dirpath, filename)


def main(root: str) -> int:
issue_count = 0
scanned_files = 0
file_issue_counts = {}

for path in iter_markdown_files(root):
scanned_files += 1
try:
with open(path, "r", encoding="utf-8") as f:
lines = f.readlines()
except Exception:
# Skip files that cannot be read (permission errors, encoding issues, etc.)
continue

file_issues = check_file_content(lines)
if file_issues:
file_issue_counts[path] = len(file_issues)

for line_num, text, target in file_issues:
issue_count += 1
msg = f"{path}:{line_num}: Link text '{text}' should be human-readable, not a raw file name or path (target: {target})"
print(msg)

if issue_count:
print(
f"Found {issues} documentation link(s) with non-human-readable text.",
f"Scanned {scanned_files} markdown file(s) under '{root}'.",
file=sys.stderr,
)
print("Issue count by file:", file=sys.stderr)
for path, count in sorted(file_issue_counts.items()):
print(f" - {path}: {count}", file=sys.stderr)
print(
f"Found {issue_count} documentation link(s) with non-human-readable text.",
file=sys.stderr,
)
print(
"Use a descriptive phrase instead of the raw file name.", file=sys.stderr
)
return 1

print(
f"Scanned {scanned_files} markdown file(s); all markdown-to-markdown links use human-readable text."
)
return 0


Expand Down
68 changes: 62 additions & 6 deletions .github/scripts/check_markdown_url_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@


# Inline markdown link or image: ![alt](target "title") or [text](target "title")
INLINE_LINK_RE = re.compile(
r"!?(?P<all>\[(?P<text>[^\]]+)\]\((?P<target>[^)\s]+)(?:\s+\"[^\"]*\")?\))"
)
INLINE_LINK_RE = re.compile(r"!?\[[^\]]+\]\((?P<body>[^)]*)\)")

# Reference-style link definitions: [id]: target "title"
REF_DEF_RE = re.compile(r"^\s*\[[^\]]+\]:\s*(?P<target>\S+)(?:\s+\"[^\"]*\")?\s*$")
# Ignore PowerShell static-member syntax like [System.IO.File]::WriteAllText(...)
# by rejecting a second colon immediately after the delimiter colon.
REF_DEF_RE = re.compile(r"^\s*\[[^\]]+\]:\s*(?!:)(?P<body>.+?)\s*$")

# Optional quoted title suffix used by both inline and reference-style links.
TITLE_SUFFIX_RE = re.compile(r'^(?P<target>.+?)(?:\s+"[^"]*")?\s*$')


def is_external(target: str) -> bool:
Expand All @@ -25,6 +28,47 @@ def has_unencoded_chars(target: str) -> bool:
return (" " in target) or ("+" in target)


def extract_target(raw_body: str) -> str:
"""Extract the link target from a markdown link body that may include a quoted title."""
body = raw_body.strip()
if not body:
return ""

m = TITLE_SUFFIX_RE.match(body)
if not m:
return body

return m.group("target").strip()


def update_code_fence_state(stripped_line: str, in_code_block: bool, code_fence_pattern: str):
"""Track fenced code blocks delimited by backticks or tildes."""
if not stripped_line:
return in_code_block, code_fence_pattern, False

fence_char = stripped_line[0]
if fence_char not in ("`", "~"):
return in_code_block, code_fence_pattern, False

if not stripped_line.startswith(fence_char * 3):
return in_code_block, code_fence_pattern, False

fence_count = 0
for ch in stripped_line:
if ch == fence_char:
fence_count += 1
else:
break
fence = fence_char * fence_count

if not in_code_block:
return True, fence, True
if stripped_line.startswith(code_fence_pattern) and stripped_line.strip() == code_fence_pattern:
return False, None, True

return in_code_block, code_fence_pattern, False


def scan_file(path: str) -> int:
issues = 0
try:
Expand All @@ -33,10 +77,22 @@ def scan_file(path: str) -> int:
except Exception:
return 0

in_code_block = False
code_fence_pattern = None

for idx, line in enumerate(lines, start=1):
stripped = line.lstrip()
in_code_block, code_fence_pattern, is_fence = update_code_fence_state(
stripped,
in_code_block,
code_fence_pattern,
)
if is_fence or in_code_block:
continue

# Inline links/images
for m in INLINE_LINK_RE.finditer(line):
target = m.group("target").strip()
target = extract_target(m.group("body"))
if is_external(target):
continue
if has_unencoded_chars(target):
Expand All @@ -46,7 +102,7 @@ def scan_file(path: str) -> int:
# Reference-style link definitions
m = REF_DEF_RE.match(line)
if m:
target = m.group("target").strip()
target = extract_target(m.group("body"))
if not is_external(target) and has_unencoded_chars(target):
issues += 1
print(f"{path}:{idx}: Unencoded character(s) in link definition: '{target}'. Encode spaces as %20 and '+' as %2B.")
Expand Down
Loading
Loading