diff --git a/ts/src/sourcemapper/sourcemapper.ts b/ts/src/sourcemapper/sourcemapper.ts index 4e94446d..91342a57 100644 --- a/ts/src/sourcemapper/sourcemapper.ts +++ b/ts/src/sourcemapper/sourcemapper.ts @@ -49,6 +49,96 @@ function createLimiter(concurrency: number) { } const MAP_EXT = '.map'; +// Per TC39 ECMA-426 §11.1.2.1 JavaScriptExtractSourceMapURL (without parsing): +// https://tc39.es/ecma426/#sec-linking-inline +// +// Split on these line terminators (ECMA-262 LineTerminatorSequence): +const LINE_SPLIT_RE = /\r\n|\n|\r|\u2028|\u2029/; +// Bytes to read from the end of a JS file when scanning for the annotation. +// The annotation must be on the last non-empty line, which is always short +// for external URLs. If no line terminator appears in the tail we fall back +// to a full file read (handles very large inline data: maps). +export const ANNOTATION_TAIL_BYTES = 4 * 1024; +// Quote code points that invalidate the annotation (U+0022, U+0027, U+0060): +const QUOTE_CHARS_RE = /["'`]/; +// MatchSourceMapURL pattern applied to the comment text that follows "//": +const MATCH_SOURCE_MAP_URL_RE = /^[@#]\s*sourceMappingURL=(\S*?)\s*$/; + +/** + * Extracts a sourceMappingURL from JS source per ECMA-426 §11.1.2.1 + * (without-parsing variant). + * + * Scans lines from the end, skipping empty/whitespace-only lines. + * Returns null as soon as the first non-empty line is found that does not + * carry a valid annotation — the URL must be on the last non-empty line. + */ +export function extractSourceMappingURL(content: string): string | undefined { + const lines = content.split(LINE_SPLIT_RE); + for (let i = lines.length - 1; i >= 0; i--) { + const line = lines[i]; + if (line.trim() === '') continue; // skip empty / whitespace-only lines + + // This is the last non-empty line; it must carry the annotation or we stop. + const commentStart = line.indexOf('//'); + if (commentStart === -1) return undefined; + + const comment = line.slice(commentStart + 2); + if (QUOTE_CHARS_RE.test(comment)) return undefined; + + const match = MATCH_SOURCE_MAP_URL_RE.exec(comment); + return match ? match[1] || undefined : undefined; + } + return undefined; +} + +/** + * Reads the sourceMappingURL from a JS file efficiently by only reading a + * small tail of the file. + * + * The annotation must be on the last non-empty line (ECMA-426), so as long as + * the tail contains at least one line terminator the last line is fully + * captured. If no line terminator appears in the tail the entire tail is part + * of one very long inline data: line; we fall back to a full file read in + * that case. + */ +export async function readSourceMappingURL( + filePath: string, +): Promise { + const fd = await fs.promises.open(filePath, 'r'); + try { + const {size} = await fd.stat(); + if (size === 0) return undefined; + + const tailSize = Math.min(ANNOTATION_TAIL_BYTES, size); + const buf = Buffer.allocUnsafe(tailSize); + await fd.read(buf, 0, tailSize, size - tailSize); + const tail = buf.toString('utf8'); + + // The last non-empty line is fully captured in the tail if and only if a + // line terminator that precedes it also falls within the tail — i.e. the + // last non-empty segment is not the very first element of the split result. + // + // Counter-example: a large inline map followed by trailing empty lines. + // The tail might be "\n\n", which contains line terminators + // but whose last non-empty content ("") is the first + // segment — it extends before the window. Checking LINE_TERM_RE alone + // would incorrectly accept this tail. + const lines = tail.split(LINE_SPLIT_RE); + let lastNonEmptyIdx = lines.length - 1; + while (lastNonEmptyIdx > 0 && lines[lastNonEmptyIdx].trim() === '') { + lastNonEmptyIdx--; + } + if (tailSize === size || lastNonEmptyIdx > 0) { + return extractSourceMappingURL(tail); + } + + const fullContent = await readFile(filePath, 'utf8'); + return extractSourceMappingURL(fullContent); + } finally { + await fd.close(); + } +} + function error(msg: string) { logger.debug(`Error: ${msg}`); return new Error(msg); @@ -99,27 +189,6 @@ async function processSourceMap( throw error('Could not read source map file ' + mapPath + ': ' + e); } - let consumer: sourceMap.RawSourceMap; - try { - // TODO: Determine how to reconsile the type conflict where `consumer` - // is constructed as a SourceMapConsumer but is used as a - // RawSourceMap. - // TODO: Resolve the cast of `contents as any` (This is needed because the - // type is expected to be of `RawSourceMap` but the existing - // working code uses a string.) - consumer = (await new sourceMap.SourceMapConsumer( - contents as {} as sourceMap.RawSourceMap, - )) as {} as sourceMap.RawSourceMap; - } catch (e) { - throw error( - 'An error occurred while reading the ' + - 'sourceMap file ' + - mapPath + - ': ' + - e, - ); - } - /* If the source map file defines a "file" attribute, use it as * the output file where the path is relative to the directory * containing the map file. Otherwise, use the name of the output @@ -137,9 +206,22 @@ async function processSourceMap( * source map file. */ const dir = path.dirname(mapPath); - const generatedPathCandidates = []; - if (consumer.file) { - generatedPathCandidates.push(path.resolve(dir, consumer.file)); + + // Parse JSON once: extract the `file` property for early-exit checks and + // reuse the parsed object when constructing SourceMapConsumer (avoids a + // second parse inside the library). + let parsedMap: sourceMap.RawSourceMap | undefined; + let rawFile: string | undefined; + try { + parsedMap = JSON.parse(contents) as sourceMap.RawSourceMap; + rawFile = parsedMap.file; + } catch { + // Will fail again below when creating SourceMapConsumer; let that throw. + } + + const generatedPathCandidates: string[] = []; + if (rawFile) { + generatedPathCandidates.push(path.resolve(dir, rawFile)); } const samePath = path.resolve(dir, path.basename(mapPath, MAP_EXT)); if ( @@ -149,22 +231,57 @@ async function processSourceMap( generatedPathCandidates.push(samePath); } - for (const generatedPath of generatedPathCandidates) { - try { - await fs.promises.access(generatedPath, fs.constants.F_OK); - infoMap.set(generatedPath, {mapFileDir: dir, mapConsumer: consumer}); + // Find the first candidate that exists and hasn't been loaded already. + let targetPath: string | undefined; + for (const candidate of generatedPathCandidates) { + if (infoMap.has(candidate)) { + // Already loaded via sourceMappingURL annotation; skip this map file. if (debug) { - logger.debug(`Loaded source map for ${generatedPath} => ${mapPath}`); + logger.debug( + `Skipping ${mapPath}: ${candidate} already loaded via sourceMappingURL`, + ); } return; + } + try { + await fs.promises.access(candidate, fs.constants.F_OK); + targetPath = candidate; + break; } catch { if (debug) { - logger.debug(`Generated path ${generatedPath} does not exist`); + logger.debug(`Generated path ${candidate} does not exist`); } } } + + if (!targetPath) { + if (debug) { + logger.debug(`Unable to find generated file for ${mapPath}`); + } + return; + } + + let consumer: sourceMap.RawSourceMap; + try { + // TODO: Determine how to reconsile the type conflict where `consumer` + // is constructed as a SourceMapConsumer but is used as a + // RawSourceMap. + consumer = (await new sourceMap.SourceMapConsumer( + (parsedMap ?? contents) as {} as sourceMap.RawSourceMap, + )) as {} as sourceMap.RawSourceMap; + } catch (e) { + throw error( + 'An error occurred while reading the ' + + 'sourceMap file ' + + mapPath + + ': ' + + e, + ); + } + + infoMap.set(targetPath, {mapFileDir: dir, mapConsumer: consumer}); if (debug) { - logger.debug(`Unable to find generated file for ${mapPath}`); + logger.debug(`Loaded source map for ${targetPath} => ${mapPath}`); } } @@ -176,41 +293,129 @@ export class SourceMapper { searchDirs: string[], debug = false, ): Promise { - if (debug) { - logger.debug( - `Looking for source map files in dirs: [${searchDirs.join(', ')}]`, - ); - } - const mapFiles: string[] = []; + const mapper = new SourceMapper(debug); for (const dir of searchDirs) { - try { - const mf = await getMapFiles(dir); - mf.forEach(mapFile => { - mapFiles.push(path.resolve(dir, mapFile)); - }); - } catch (e) { - throw error(`failed to get source maps from ${dir}: ${e}`); - } - } - if (debug) { - logger.debug(`Found source map files: [${mapFiles.join(', ')}]`); + await mapper.loadDirectory(dir); } - return createFromMapFiles(mapFiles, debug); + return mapper; } - /** - * @param {Array.} sourceMapPaths An array of paths to .map source map - * files that should be processed. The paths should be relative to the - * current process's current working directory - * @param {Logger} logger A logger that reports errors that occurred while - * processing the given source map files - * @constructor - */ constructor(debug = false) { this.infoMap = new Map(); this.debug = debug; } + /** + * Scans `searchDir` recursively for JS files and source map files, loading + * source maps for all JS files found. + * + * Priority for each JS file: + * 1. A map pointed to by a `sourceMappingURL` annotation in the JS file + * (inline `data:` URL or external file path, only if the file exists). + * 2. A `.map` file found in the directory scan that claims to belong to + * that JS file (via its `file` property or naming convention). + * + * Safe to call multiple times; already-loaded files are skipped. + */ + async loadDirectory(searchDir: string): Promise { + // Resolve to absolute so all paths in infoMap are consistent regardless of + // whether the caller passed a relative or absolute directory. + searchDir = path.resolve(searchDir); + + if (this.debug) { + logger.debug(`Loading source maps from directory: ${searchDir}`); + } + + const jsFiles: string[] = []; + const mapFiles: string[] = []; + + for await (const entry of walk( + searchDir, + filename => + /\.[cm]?js$/.test(filename) || /\.[cm]?js\.map$/.test(filename), + (root, dirname) => + root !== '/proc' && dirname !== '.git' && dirname !== 'node_modules', + )) { + if (entry.endsWith(MAP_EXT)) { + mapFiles.push(entry); + } else { + jsFiles.push(entry); + } + } + + if (this.debug) { + logger.debug( + `Found ${jsFiles.length} JS files and ${mapFiles.length} map files in ${searchDir}`, + ); + } + + const limit = createLimiter(CONCURRENCY); + + // Phase 1: Check sourceMappingURL annotations in JS files (higher priority). + await Promise.all( + jsFiles.map(jsPath => + limit(async () => { + if (this.infoMap.has(jsPath)) return; + + let url: string | undefined; + try { + url = await readSourceMappingURL(jsPath); + } catch { + return; + } + if (!url) return; + + const INLINE_PREFIX = 'data:application/json;base64,'; + if (url.startsWith(INLINE_PREFIX)) { + const mapContent = Buffer.from( + url.slice(INLINE_PREFIX.length), + 'base64', + ).toString(); + await this.loadMapContent(jsPath, mapContent, path.dirname(jsPath)); + } else { + const mapPath = path.resolve(path.dirname(jsPath), url); + try { + const mapContent = await readFile(mapPath, 'utf8'); + await this.loadMapContent( + jsPath, + mapContent, + path.dirname(mapPath), + ); + } catch { + // Map file doesn't exist or is unreadable; fall through to Phase 2. + } + } + }), + ), + ); + + // Phase 2: Process .map files for any JS files not yet resolved. + await Promise.all( + mapFiles.map(mapPath => + limit(() => processSourceMap(this.infoMap, mapPath, this.debug)), + ), + ); + } + + private async loadMapContent( + jsPath: string, + mapContent: string, + mapDir: string, + ): Promise { + try { + const parsedMap = JSON.parse(mapContent) as sourceMap.RawSourceMap; + const consumer = (await new sourceMap.SourceMapConsumer( + parsedMap, + )) as {} as sourceMap.RawSourceMap; + this.infoMap.set(jsPath, {mapFileDir: mapDir, mapConsumer: consumer}); + if (this.debug) { + logger.debug(`Loaded source map for ${jsPath} via sourceMappingURL`); + } + } catch (e) { + logger.debug(`Failed to parse source map for ${jsPath}: ${e}`); + } + } + /** * Used to get the information about the transpiled file from a given input * source file provided there isn't any ambiguity with associating the input @@ -321,25 +526,6 @@ export class SourceMapper { } } -async function createFromMapFiles( - mapFiles: string[], - debug: boolean, -): Promise { - const limit = createLimiter(CONCURRENCY); - const mapper = new SourceMapper(debug); - const promises: Array> = mapFiles.map(mapPath => - limit(() => processSourceMap(mapper.infoMap, mapPath, debug)), - ); - try { - await Promise.all(promises); - } catch (err) { - throw error( - 'An error occurred while processing the source map files' + err, - ); - } - return mapper; -} - function isErrnoException(e: unknown): e is NodeJS.ErrnoException { return e instanceof Error && 'code' in e; } @@ -382,16 +568,3 @@ async function* walk( yield* walkRecursive(dir); } - -async function getMapFiles(baseDir: string): Promise { - const mapFiles: string[] = []; - for await (const entry of walk( - baseDir, - filename => /\.[cm]?js\.map$/.test(filename), - (root, dirname) => - root !== '/proc' && dirname !== '.git' && dirname !== 'node_modules', - )) { - mapFiles.push(path.relative(baseDir, entry)); - } - return mapFiles; -} diff --git a/ts/test/test-profile-serializer.ts b/ts/test/test-profile-serializer.ts index c4461cdd..8f87b0f3 100644 --- a/ts/test/test-profile-serializer.ts +++ b/ts/test/test-profile-serializer.ts @@ -206,8 +206,7 @@ describe('profile-serializer', () => { describe('source map specified', () => { let sourceMapper: SourceMapper; before(async () => { - const sourceMapFiles = [mapDirPath]; - sourceMapper = await SourceMapper.create(sourceMapFiles); + sourceMapper = await SourceMapper.create([mapDirPath]); }); describe('serializeHeapProfile', () => { diff --git a/ts/test/test-sourcemapper.ts b/ts/test/test-sourcemapper.ts new file mode 100644 index 00000000..350df364 --- /dev/null +++ b/ts/test/test-sourcemapper.ts @@ -0,0 +1,274 @@ +/** + * Copyright 2017 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import * as assert from 'assert'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as tmp from 'tmp'; + +import { + ANNOTATION_TAIL_BYTES, + SourceMapper, + extractSourceMappingURL, + readSourceMappingURL, +} from '../src/sourcemapper/sourcemapper'; + +describe('extractSourceMappingURL', () => { + it('returns URL from a standard annotation', () => { + assert.strictEqual( + extractSourceMappingURL('//# sourceMappingURL=foo.js.map\n'), + 'foo.js.map', + ); + }); + + it('accepts legacy //@ prefix', () => { + assert.strictEqual( + extractSourceMappingURL('//@ sourceMappingURL=foo.js.map\n'), + 'foo.js.map', + ); + }); + + it('skips trailing empty and whitespace-only lines', () => { + assert.strictEqual( + extractSourceMappingURL('//# sourceMappingURL=foo.js.map\n\n \n'), + 'foo.js.map', + ); + }); + + it('allows leading whitespace before //', () => { + assert.strictEqual( + extractSourceMappingURL(' //# sourceMappingURL=foo.js.map\n'), + 'foo.js.map', + ); + }); + + it('returns undefined when last non-empty line has no // comment', () => { + assert.strictEqual(extractSourceMappingURL('const x = 1;\n'), undefined); + }); + + it('returns undefined when // comment does not match annotation pattern', () => { + assert.strictEqual( + extractSourceMappingURL('// some other comment\n'), + undefined, + ); + }); + + it('returns undefined (early exit) when last non-empty line is not an annotation, even if earlier lines are', () => { + // The annotation must be on the last non-empty line; earlier ones are ignored. + assert.strictEqual( + extractSourceMappingURL( + '//# sourceMappingURL=foo.js.map\nconst x = 1;\n', + ), + undefined, + ); + }); + + it('returns undefined when comment contains a double-quote', () => { + assert.strictEqual( + extractSourceMappingURL('//# sourceMappingURL="foo.js.map"\n'), + undefined, + ); + }); + + it('returns undefined when comment contains a single-quote', () => { + assert.strictEqual( + extractSourceMappingURL("//# sourceMappingURL='foo.js.map'\n"), + undefined, + ); + }); + + it('returns undefined when comment contains a backtick', () => { + assert.strictEqual( + extractSourceMappingURL('//# sourceMappingURL=`foo.js.map`\n'), + undefined, + ); + }); + + it('returns undefined for empty content', () => { + assert.strictEqual(extractSourceMappingURL(''), undefined); + }); + + it('returns undefined for whitespace-only content', () => { + assert.strictEqual(extractSourceMappingURL(' \n\n \n'), undefined); + }); + + it('handles all line terminator variants', () => { + assert.strictEqual( + extractSourceMappingURL('x\r//# sourceMappingURL=a.map'), + 'a.map', + ); + assert.strictEqual( + extractSourceMappingURL('x\r\n//# sourceMappingURL=b.map'), + 'b.map', + ); + assert.strictEqual( + extractSourceMappingURL('x\u2028//# sourceMappingURL=c.map'), + 'c.map', + ); + assert.strictEqual( + extractSourceMappingURL('x\u2029//# sourceMappingURL=d.map'), + 'd.map', + ); + }); + + it('returns a data: URL for inline source maps', () => { + const map = Buffer.from('{"mappings":""}').toString('base64'); + const url = `data:application/json;base64,${map}`; + assert.strictEqual( + extractSourceMappingURL(`//# sourceMappingURL=${url}\n`), + url, + ); + }); +}); + +describe('readSourceMappingURL', () => { + let tmpDir: string; + + before(() => { + tmp.setGracefulCleanup(); + tmpDir = tmp.dirSync().name; + }); + + function write(name: string, content: string): string { + const p = path.join(tmpDir, name); + fs.writeFileSync(p, content, 'utf8'); + return p; + } + + // Build a fake base64 payload larger than ANNOTATION_TAIL_BYTES to force + // the "last non-empty line extends before the tail window" scenario. + const LARGE_BASE64 = 'A'.repeat(ANNOTATION_TAIL_BYTES + 128); + const LARGE_ANNOTATION = `//# sourceMappingURL=data:application/json;base64,${LARGE_BASE64}`; + + it('reads external URL from a small file (fits entirely in tail)', async () => { + const p = write('ext-small.js', '//# sourceMappingURL=ext-small.js.map\n'); + assert.strictEqual(await readSourceMappingURL(p), 'ext-small.js.map'); + }); + + it('reads inline data: URL from a small file (fits entirely in tail)', async () => { + const map = Buffer.from('{"mappings":""}').toString('base64'); + const url = `data:application/json;base64,${map}`; + const p = write('inline-small.js', `//# sourceMappingURL=${url}\n`); + assert.strictEqual(await readSourceMappingURL(p), url); + }); + + it('returns undefined for a small file with no annotation', async () => { + const p = write('no-annotation.js', 'const x = 1;\n'); + assert.strictEqual(await readSourceMappingURL(p), undefined); + }); + + it('reads external URL from a large file (last line short, captured in tail)', async () => { + // Pad the file so the total size exceeds ANNOTATION_TAIL_BYTES, but keep + // the annotation line itself short so it fits within the tail. + const padding = '//' + ' '.repeat(ANNOTATION_TAIL_BYTES) + '\n'; + const p = write( + 'ext-large.js', + padding + '//# sourceMappingURL=ext-large.js.map\n', + ); + assert.strictEqual(await readSourceMappingURL(p), 'ext-large.js.map'); + }); + + it('reads large inline data: URL — no trailing newline (full-file fallback)', async () => { + // The annotation line is longer than ANNOTATION_TAIL_BYTES with no + // trailing newline, so the tail contains no line terminator → fallback. + const p = write('inline-large-no-nl.js', LARGE_ANNOTATION); + assert.strictEqual( + await readSourceMappingURL(p), + `data:application/json;base64,${LARGE_BASE64}`, + ); + }); + + it('reads large inline data: URL — single trailing newline (full-file fallback)', async () => { + // tail = "\n" → lastNonEmptyIdx === 0 → fallback. + const p = write('inline-large-one-nl.js', LARGE_ANNOTATION + '\n'); + assert.strictEqual( + await readSourceMappingURL(p), + `data:application/json;base64,${LARGE_BASE64}`, + ); + }); + + it('reads large inline data: URL — multiple trailing empty lines (full-file fallback)', async () => { + // The bug case: tail = "\n\n" has line terminators but + // lastNonEmptyIdx === 0, so we must not use the tail alone. + const p = write('inline-large-multi-nl.js', LARGE_ANNOTATION + '\n\n\n'); + assert.strictEqual( + await readSourceMappingURL(p), + `data:application/json;base64,${LARGE_BASE64}`, + ); + }); + + it('returns undefined for a large file with no annotation', async () => { + const padding = 'x'.repeat(ANNOTATION_TAIL_BYTES + 1) + '\n'; + const p = write('large-no-annotation.js', padding + 'const x = 1;\n'); + assert.strictEqual(await readSourceMappingURL(p), undefined); + }); + + it('returns undefined for an empty file', async () => { + const p = write('empty.js', ''); + assert.strictEqual(await readSourceMappingURL(p), undefined); + }); +}); + +describe('SourceMapper.loadDirectory', () => { + let tmpDir: string; + + before(() => { + tmp.setGracefulCleanup(); + tmpDir = tmp.dirSync().name; + }); + + function write(name: string, content: string): string { + const p = path.join(tmpDir, name); + fs.writeFileSync(p, content, 'utf8'); + return p; + } + + // A minimal valid source map for test.js -> test.ts + const MAP_CONTENT = JSON.stringify({ + version: 3, + file: 'test.js', + sources: ['test.ts'], + names: [], + mappings: 'AAAA', + }); + + it('falls back to .map file when sourceMappingURL points to a non-existent file', async () => { + // The annotation references a file that doesn't exist; Phase 2 should + // find and load the conventional test.js.map instead. + write('test.js', '//# sourceMappingURL=nonexistent.js.map\n'); + write('test.js.map', MAP_CONTENT); + + const sm = new SourceMapper(); + await sm.loadDirectory(tmpDir); + + assert.ok( + sm.hasMappingInfo(path.join(tmpDir, 'test.js')), + 'expected mapping to be loaded via .map file fallback', + ); + }); + + it('loads no mapping when sourceMappingURL points to a non-existent file and there is no .map fallback', async () => { + write('orphan.js', '//# sourceMappingURL=nonexistent.js.map\n'); + // No orphan.js.map written — nothing to fall back to. + + const sm = new SourceMapper(); + await sm.loadDirectory(tmpDir); + + assert.ok( + !sm.hasMappingInfo(path.join(tmpDir, 'orphan.js')), + 'expected no mapping to be loaded', + ); + }); +});