Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
272 changes: 269 additions & 3 deletions app/api/markdown/[[...slug]]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,269 @@ export const dynamic = "force-dynamic";
// Regex pattern for removing .md extension
const MD_EXTENSION_REGEX = /\.md$/;

// Regex patterns for MDX to Markdown compilation (top-level for performance)
const FRONTMATTER_REGEX = /^---\n([\s\S]*?)\n---\n?/;
const IMPORT_FROM_REGEX = /^import\s+.*?from\s+['"].*?['"];?\s*$/gm;
const IMPORT_DIRECT_REGEX = /^import\s+['"].*?['"];?\s*$/gm;
const IMPORT_DESTRUCTURE_REGEX =
/^import\s*\{[\s\S]*?\}\s*from\s*['"].*?['"];?\s*$/gm;
const EXPORT_REGEX =
/^export\s+(const|let|var|function|default)\s+[\s\S]*?(?=\n(?:import|export|#|\n|$))/gm;
// JSX attribute pattern that properly handles:
// - Quoted strings containing ">" characters
// - JSX expressions in curly braces containing ">" (arrow functions, comparisons)
// - Multiline attributes (newlines allowed between attributes)
// - Up to 3 levels of brace nesting for style={{outer: {inner: 1}}} patterns
// The brace pattern uses a recursive-like structure to handle nested braces
const BRACE_CONTENT_L0 = "[^{}]*"; // Innermost: no braces
const BRACE_CONTENT_L1 = `(?:${BRACE_CONTENT_L0}|\\{${BRACE_CONTENT_L0}\\})*`; // 1 level
const BRACE_CONTENT_L2 = `(?:${BRACE_CONTENT_L0}|\\{${BRACE_CONTENT_L1}\\})*`; // 2 levels
const BRACE_PATTERN = `\\{${BRACE_CONTENT_L2}\\}`; // Full brace expression (supports 3 levels)
const JSX_ATTRS_PATTERN = `(?:[^>"'{}]|"[^"]*"|'[^']*'|${BRACE_PATTERN})*`;
const SELF_CLOSING_JSX_REGEX = new RegExp(
`<([A-Z][a-zA-Z0-9.]*)${JSX_ATTRS_PATTERN}\\/>`,
"g"
);
const JSX_WITH_CHILDREN_REGEX = new RegExp(
`<([A-Z][a-zA-Z0-9.]*)${JSX_ATTRS_PATTERN}>([\\s\\S]*?)<\\/\\1>`,
"g"
);
const CODE_BLOCK_REGEX = /```[\s\S]*?```/g;
const JSX_EXPRESSION_REGEX = /\{[^}]+\}/g;
const EXCESSIVE_NEWLINES_REGEX = /\n{3,}/g;
const CODE_BLOCK_PLACEHOLDER_REGEX = /__CODE_BLOCK_(\d+)__/g;

// Regex for detecting markdown list items and numbered lists
const UNORDERED_LIST_REGEX = /^[-*+]\s/;
const ORDERED_LIST_REGEX = /^\d+[.)]\s/;

// Regex for extracting frontmatter fields
// Handles: "double quoted", 'single quoted', or unquoted values
// Group 1 = double-quoted content, Group 2 = single-quoted content, Group 3 = unquoted/fallback
// Quoted patterns require closing quote at end of line to prevent apostrophes being misread as delimiters
const TITLE_REGEX = /title:\s*(?:"([^"]*)"\s*$|'([^']*)'\s*$|([^\n]+))/;
const DESCRIPTION_REGEX =
/description:\s*(?:"([^"]*)"\s*$|'([^']*)'\s*$|([^\n]+))/;

// Regex for detecting leading whitespace on lines
const LEADING_WHITESPACE_REGEX = /^[ \t]+/;

/**
* Removes consistent leading indentation from all lines of text.
* This normalizes content that was indented inside JSX components.
* Code block markers (```) are ignored when calculating minimum indent
* since they typically start at column 0 in MDX files.
*/
function dedent(text: string): string {
const lines = text.split("\n");

// Find minimum indentation, ignoring:
// - Empty lines
// - Code block markers (lines starting with ```)
let minIndent = Number.POSITIVE_INFINITY;
for (const line of lines) {
const trimmed = line.trim();
if (trimmed === "" || trimmed.startsWith("```")) {
continue; // Ignore empty lines and code block markers
}
const match = line.match(LEADING_WHITESPACE_REGEX);
const indent = match ? match[0].length : 0;
if (indent < minIndent) {
minIndent = indent;
}
}

// If no indentation found, return as-is
if (minIndent === 0 || minIndent === Number.POSITIVE_INFINITY) {
return text;
}

// Remove the minimum indentation from each line (except code block content)
return lines
.map((line) => {
const trimmed = line.trim();
// Calculate leading whitespace length for this line
const leadingMatch = line.match(LEADING_WHITESPACE_REGEX);
const leadingLength = leadingMatch ? leadingMatch[0].length : 0;
// Don't modify empty lines or lines with less indentation than min
if (trimmed === "" || leadingLength < minIndent) {
return line.trimStart();
}
// Preserve code block markers - just remove leading whitespace
// This matches the logic that ignores them when calculating minIndent
if (trimmed.startsWith("```")) {
return trimmed;
}
return line.slice(minIndent);
})
.join("\n");
}

/**
* Strips surrounding quotes from a value if present.
* Used for unquoted fallback values that may contain quotes due to apostrophe handling.
*/
function stripSurroundingQuotes(value: string): string {
const trimmed = value.trim();
if (
(trimmed.startsWith('"') && trimmed.endsWith('"')) ||
(trimmed.startsWith("'") && trimmed.endsWith("'"))
) {
return trimmed.slice(1, -1);
}
return trimmed;
}

/**
* Extracts title and description from frontmatter.
* Handles double-quoted, single-quoted, and unquoted YAML values.
*/
function extractFrontmatterMeta(frontmatter: string): {
title: string;
description: string;
} {
const titleMatch = frontmatter.match(TITLE_REGEX);
const descriptionMatch = frontmatter.match(DESCRIPTION_REGEX);

// Extract from whichever capture group matched:
// Group 1 = double-quoted, Group 2 = single-quoted, Group 3 = unquoted/fallback
// For group 3 (fallback), strip surrounding quotes if present
const title =
titleMatch?.[1] ??
titleMatch?.[2] ??
stripSurroundingQuotes(titleMatch?.[3] ?? "");
const description =
descriptionMatch?.[1] ??
descriptionMatch?.[2] ??
stripSurroundingQuotes(descriptionMatch?.[3] ?? "");

return {
title: title || "Arcade Documentation",
description,
};
}

/**
* Normalizes indentation in the final output.
* Removes stray leading whitespace outside code blocks while preserving
* meaningful markdown indentation (nested lists, blockquotes).
*/
function normalizeIndentation(text: string): string {
const finalLines: string[] = [];
let inCodeBlock = false;

for (const line of text.split("\n")) {
if (line.trim().startsWith("```")) {
inCodeBlock = !inCodeBlock;
finalLines.push(line.trimStart()); // Code block markers should start at column 0
} else if (inCodeBlock) {
finalLines.push(line); // Preserve indentation inside code blocks
} else {
const trimmed = line.trimStart();
// Preserve indentation for nested list items and blockquotes
const isListItem =
UNORDERED_LIST_REGEX.test(trimmed) || ORDERED_LIST_REGEX.test(trimmed);
const isBlockquote = trimmed.startsWith(">");
if ((isListItem || isBlockquote) && line.startsWith(" ")) {
// Keep markdown-meaningful indentation (but normalize to 2-space increments)
const leadingSpaces = line.length - line.trimStart().length;
const normalizedIndent = " ".repeat(Math.floor(leadingSpaces / 2));
finalLines.push(normalizedIndent + trimmed);
} else {
finalLines.push(trimmed); // Remove leading whitespace for other lines
}
}
}

return finalLines.join("\n");
}

/**
* Compiles MDX content to clean markdown by:
* - Preserving frontmatter
* - Removing import statements
* - Converting JSX components to their text content
* - Preserving standard markdown
* - Providing fallback content for component-only pages
*/
function compileMdxToMarkdown(content: string, pagePath: string): string {
let result = content;

// Extract and preserve frontmatter if present
let frontmatter = "";
const frontmatterMatch = result.match(FRONTMATTER_REGEX);
if (frontmatterMatch) {
frontmatter = frontmatterMatch[0];
result = result.slice(frontmatterMatch[0].length);
}

// Remove import statements (various formats)
result = result.replace(IMPORT_FROM_REGEX, "");
result = result.replace(IMPORT_DIRECT_REGEX, "");
result = result.replace(IMPORT_DESTRUCTURE_REGEX, "");

// Remove export statements (like export const metadata)
result = result.replace(EXPORT_REGEX, "");

// Process self-closing JSX components (e.g., <Component /> or <Component prop="value" />)
// Handles components with dots like <GuideOverview.Item />
result = result.replace(SELF_CLOSING_JSX_REGEX, "");

// Process JSX components with children - extract the text content
// Handles components with dots like <Tabs.Tab>content</Tabs.Tab>
// Keep processing until no more JSX components remain
let previousResult = "";
while (previousResult !== result) {
previousResult = result;
// Match opening tag, capture tag name (with dots), and content until matching closing tag
// Apply dedent to each extracted piece to normalize indentation
result = result.replace(JSX_WITH_CHILDREN_REGEX, (_, _tag, innerContent) =>
dedent(innerContent.trim())
);
}

// Remove any remaining JSX expressions like {variable} or {expression}
// But preserve code blocks by temporarily replacing them
const codeBlocks: string[] = [];
result = result.replace(CODE_BLOCK_REGEX, (match) => {
codeBlocks.push(match);
return `__CODE_BLOCK_${codeBlocks.length - 1}__`;
});

// Now remove JSX expressions outside code blocks
result = result.replace(JSX_EXPRESSION_REGEX, "");

// Restore code blocks (return original placeholder if index doesn't exist)
result = result.replace(
CODE_BLOCK_PLACEHOLDER_REGEX,
(match, index) => codeBlocks[Number.parseInt(index, 10)] ?? match
);

// Normalize indentation (remove stray whitespace, preserve meaningful markdown indentation)
result = normalizeIndentation(result);

// Clean up excessive blank lines (more than 2 consecutive)
result = result.replace(EXCESSIVE_NEWLINES_REGEX, "\n\n");

// Trim leading/trailing whitespace
result = result.trim();

// If content is essentially empty (component-only page), provide fallback
if (!result || result.length < 10) {
const { title, description } = extractFrontmatterMeta(frontmatter);
const htmlUrl = `https://docs.arcade.dev${pagePath}`;
return `${frontmatter}# ${title}

${description}

This page contains interactive content. Visit the full page at: ${htmlUrl}
`;
}

// Reconstruct with frontmatter
return `${frontmatter}${result}\n`;
}

export async function GET(
request: NextRequest,
_context: { params: Promise<{ slug?: string[] }> }
Expand All @@ -31,13 +294,16 @@ export async function GET(
return new NextResponse("Markdown file not found", { status: 404 });
}

const content = await readFile(filePath, "utf-8");
const rawContent = await readFile(filePath, "utf-8");

// Compile MDX to clean markdown
const content = compileMdxToMarkdown(rawContent, pathWithoutMd);

// Return the raw markdown with proper headers
// Return the compiled markdown with proper headers
return new NextResponse(content, {
status: 200,
headers: {
"Content-Type": "text/plain; charset=utf-8",
"Content-Type": "text/markdown; charset=utf-8",
"Content-Disposition": "inline",
},
});
Expand Down
7 changes: 7 additions & 0 deletions app/layout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ export default async function RootLayout({
<meta content="width=device-width, initial-scale=1" name="viewport" />
<link href="https://www.googletagmanager.com" rel="preconnect" />
<link href="https://www.googletagmanager.com" rel="dns-prefetch" />
{pathname !== "/" && (
<link
href={`https://docs.arcade.dev${pathname}.md`}
rel="alternate"
type="text/markdown"
/>
)}
</Head>
<body>
{lang !== "en" && (
Expand Down