Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/ipynb-attachment-fixes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"myst-to-ipynb": patch
"myst-cli": patch
---

ipynb attachment embedding: preserve optional image titles when rewriting to `attachment:` references, resolve images written to the export `files/` output folder (e.g. executed notebook outputs) that don't exist relative to the source file, and read image files asynchronously.
63 changes: 43 additions & 20 deletions packages/myst-cli/src/build/ipynb/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,12 @@ export async function runIpynbExport(
if ((exportOptions as any).images === 'attachment') {
ipynbOpts.images = 'attachment';
// Collect image data from the AST — read files and base64-encode
ipynbOpts.imageData = collectImageData(session, mdast, article.file);
ipynbOpts.imageData = await collectImageData(
session,
mdast,
article.file,
path.dirname(output),
);
}
const mdOut = writeIpynb(vfile, mdast as any, frontmatter, ipynbOpts);
logMessagesFromVFile(session, mdOut);
Expand All @@ -77,47 +82,65 @@ export async function runIpynbExport(
* the serialized markdown to use `attachment:` references.
*
* Remote URLs (http/https) and data URIs are skipped — only local files are embedded.
*
* URLs are resolved against the source file's folder first; if not found
* there, against the export output folder — `finalizeMdast` rewrites
* generated images (e.g. executed notebook outputs) to `files/...` web
* paths under the output folder, so those only exist at the output side.
*/
function collectImageData(
async function collectImageData(
session: ISession,
mdast: any,
sourceFile: string,
): Record<string, ImageData> {
outputFolder: string,
): Promise<Record<string, ImageData>> {
const imageData: Record<string, ImageData> = {};
const imageNodes = selectAll('image', mdast) as any[];
const sourcePath = session.sourcePath();
const seen = new Set<string>();

for (const img of imageNodes) {
const tasks = imageNodes.map(async (img) => {
const url = img.url ?? img.urlSource;
if (
!url ||
url.startsWith('http://') ||
url.startsWith('https://') ||
url.startsWith('data:')
) {
continue;
return;
}
if (imageData[url]) continue; // already processed
if (seen.has(url)) return; // already processed
seen.add(url);

const sourceFolder = getSourceFolder(url, sourceFile, sourcePath);
const relativeUrl = url.replace(/^[/\\]+/, '');
const filePath = path.join(sourceFolder, relativeUrl);
const candidates = [path.join(sourceFolder, relativeUrl), path.join(outputFolder, relativeUrl)];

try {
if (!fs.existsSync(filePath)) {
session.log.debug(`Image not found for attachment embedding: ${filePath}`);
continue;
for (const filePath of candidates) {
try {
const buffer = await fs.promises.readFile(filePath);
const mimeType = (mime.lookup(filePath) || 'application/octet-stream') as string;
imageData[url] = {
mime: mimeType,
data: buffer.toString('base64'),
};
return;
} catch (err) {
// Missing files fall through to the next candidate quietly;
// other failures (EACCES, EISDIR, ...) are surfaced. Embedding
// stays best-effort either way — the image is left as a path.
if ((err as NodeJS.ErrnoException).code !== 'ENOENT') {
session.log.warn(
`Could not read image for attachment embedding: ${filePath} (${(err as NodeJS.ErrnoException).code})`,
);
}
}
const buffer = fs.readFileSync(filePath);
const mimeType = (mime.lookup(filePath) || 'application/octet-stream') as string;
imageData[url] = {
mime: mimeType,
data: buffer.toString('base64'),
};
} catch (err) {
session.log.debug(`Failed to read image for attachment: ${filePath}`);
}
}
session.log.debug(
`Image not found for attachment embedding: ${url} (tried: ${candidates.join(', ')})`,
);
});
await Promise.all(tasks);

return imageData;
}
7 changes: 4 additions & 3 deletions packages/myst-to-ipynb/src/attachments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ export function embedImagesAsAttachments(
// Handles escaped brackets in alt text and escaped parentheses in URLs.
// The escaped sequences (\] and \)) must appear BEFORE the single-char
// alternatives so the regex engine matches them as pairs first.
const imgRegex = /!\[((?:\\\]|[^\]])*)\]\(((?:\\\)|[^)\s])+)(?:\s+"[^"]*")?\)/g;
const imgRegex = /!\[((?:\\\]|[^\]])*)\]\(((?:\\\)|[^)\s])+)(\s+"[^"]*")?\)/g;

const updatedMd = md.replace(imgRegex, (fullMatch, alt, url) => {
const updatedMd = md.replace(imgRegex, (fullMatch, alt, url, title) => {
// Unescape markdown characters that mdast-util-to-markdown might have added
const unescapedUrl = url.replace(/\\([()[\]])/g, '$1');

Expand All @@ -87,7 +87,8 @@ export function embedImagesAsAttachments(
usedNames.add(name);

attachments[name] = { [data.mime]: data.data };
return `![${alt}](attachment:${name})`;
// Preserve an optional image title: ![alt](url "title")
return `![${alt}](attachment:${name}${title ?? ''})`;
});

if (Object.keys(attachments).length > 0) {
Expand Down
12 changes: 12 additions & 0 deletions packages/myst-to-ipynb/tests/attachments.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ describe('embedImagesAsAttachments', () => {
});
});

test('preserves an optional image title', () => {
const md = '![Chart](/_static/chart.png "My chart title")';
const imageData = {
'/_static/chart.png': { mime: 'image/png', data: 'AAAA' },
};
const result = embedImagesAsAttachments(md, imageData);
expect(result.md).toBe('![Chart](attachment:chart.png "My chart title")');
expect(result.attachments).toEqual({
'chart.png': { 'image/png': 'AAAA' },
});
});

test('skips images not in imageData', () => {
const md = '![A](/a.png)\n\n![B](/b.png)';
const imageData = {
Expand Down