diff --git a/package.json b/package.json index 03411e81..58327d28 100644 --- a/package.json +++ b/package.json @@ -77,6 +77,7 @@ "celaria-formats": "^1.0.2", "chess.js": "^1.4.0", "confbox": "^0.2.4", + "dom-to-svg": "^0.12.2", "imagetracer": "^0.2.2", "js-synthesizer": "^1.11.0", "json5": "^2.2.3", diff --git a/src/handlers/htmlToSvg.ts b/src/handlers/htmlToSvg.ts new file mode 100644 index 00000000..c54ab540 --- /dev/null +++ b/src/handlers/htmlToSvg.ts @@ -0,0 +1,166 @@ +import { elementToSVG, inlineResources } from "dom-to-svg"; +import CommonFormats, { Category } from "src/CommonFormats.ts"; +import type { FileData, FileFormat, FormatHandler } from "../FormatHandler.ts"; + +function nextPaint(): Promise { + return new Promise(resolve => { + requestAnimationFrame(() => { + requestAnimationFrame(() => resolve()); + }); + }); +} + +async function waitForRenderableAssets(root: ParentNode): Promise { + const pendingImages = Array.from(root.querySelectorAll("img")) + .filter(image => !image.complete) + .map(image => new Promise(resolve => { + image.addEventListener("load", () => resolve(), { once: true }); + image.addEventListener("error", () => resolve(), { once: true }); + })); + + const pendingVideos = Array.from(root.querySelectorAll("video")) + .filter(video => video.readyState < 2) + .map(video => new Promise(resolve => { + video.addEventListener("loadeddata", () => resolve(), { once: true }); + video.addEventListener("error", () => resolve(), { once: true }); + })); + + await Promise.all([...pendingImages, ...pendingVideos]); + await nextPaint(); +} + +type HtmlToSvgOptions = { + width?: number; + height?: number; + backgroundColor?: string; +}; + +function measureRenderedElement( + element: Element, + options: HtmlToSvgOptions, +): { width: number; height: number } { + const rect = element.getBoundingClientRect(); + const widthCandidate = element instanceof HTMLElement || element instanceof SVGElement + ? Math.max(rect.width, element.scrollWidth || 0, element.clientWidth || 0) + : rect.width; + const heightCandidate = element instanceof HTMLElement || element instanceof SVGElement + ? Math.max(rect.height, element.scrollHeight || 0, element.clientHeight || 0) + : rect.height; + + return { + width: Math.max(1, Math.ceil(options.width ?? widthCandidate)), + height: Math.max(1, Math.ceil(options.height ?? heightCandidate)), + }; +} + +async function renderRootToSvgString( + root: HTMLElement, + options: HtmlToSvgOptions, +): Promise { + await waitForRenderableAssets(root); + + const { width, height } = measureRenderedElement(root, options); + const existingStyle = root.getAttribute("style") || ""; + const bg = options.backgroundColor ? `background-color:${options.backgroundColor};` : ""; + root.setAttribute( + "style", + `${existingStyle}${bg}width:${width}px;height:${height}px;box-sizing:border-box;`, + ); + + await nextPaint(); + + const bounds = root.getBoundingClientRect(); + const svgDocument = elementToSVG(root, { captureArea: bounds }); + await inlineResources(svgDocument.documentElement); + return new XMLSerializer().serializeToString(svgDocument); +} + +async function htmlContentToSvgString( + htmlContent: string, + options: HtmlToSvgOptions = {}, +): Promise { + const parsed = new DOMParser().parseFromString(htmlContent, "text/html"); + const host = document.createElement("div"); + host.style.all = "initial"; + host.style.position = "fixed"; + host.style.left = "-20000px"; + host.style.top = "0"; + host.style.pointerEvents = "none"; + host.style.background = "transparent"; + document.body.appendChild(host); + + try { + const shadow = host.attachShadow({ mode: "closed" }); + + for (const styleElement of Array.from(parsed.querySelectorAll("style"))) { + shadow.appendChild(styleElement.cloneNode(true)); + } + + const root = document.createElement("div"); + const bodyStyle = parsed.body.getAttribute("style"); + if (bodyStyle) root.setAttribute("style", bodyStyle); + + const sourceNodes = parsed.body.childNodes.length > 0 + ? Array.from(parsed.body.childNodes) + : Array.from(parsed.documentElement.childNodes); + for (const childNode of sourceNodes) { + root.appendChild(childNode.cloneNode(true)); + } + + shadow.appendChild(root); + + return await renderRootToSvgString(root, options); + } finally { + host.remove(); + } +} + +class HtmlToSvgHandler implements FormatHandler { + + public name: string = "dom-to-svg"; + + public supportedFormats: FileFormat[] = [ + CommonFormats.HTML.supported("html", true, false), + CommonFormats.SVG.supported("svg", false, true, false, { + category: [Category.IMAGE, Category.VECTOR], + }) + ]; + + public ready: boolean = true; + + async init () { + this.ready = true; + } + + async doConvert ( + inputFiles: FileData[], + inputFormat: FileFormat, + outputFormat: FileFormat, + ): Promise { + + if (inputFormat.internal !== "html") throw "Invalid input format."; + if (outputFormat.internal !== "svg") throw "Invalid output format."; + + const outputFiles: FileData[] = []; + + const encoder = new TextEncoder(); + const decoder = new TextDecoder(); + + for (const inputFile of inputFiles) { + const { name, bytes } = inputFile; + const htmlStr = decoder.decode(bytes); + const svgStr = await htmlContentToSvgString(htmlStr); + const newName = (name.endsWith(".html") ? name.slice(0, -5) : name) + ".svg"; + outputFiles.push({ + name: newName, + bytes: encoder.encode(svgStr), + }); + } + + return outputFiles; + + } + +} + +export default HtmlToSvgHandler; diff --git a/src/handlers/index.ts b/src/handlers/index.ts index 0c6fae4b..30243202 100644 --- a/src/handlers/index.ts +++ b/src/handlers/index.ts @@ -13,7 +13,7 @@ import svgTraceHandler from "./svgTrace.ts"; import { renameZipHandler, renameTxtHandler, renameJsonHandler } from "./rename.ts"; import envelopeHandler from "./envelope.ts"; import pandocHandler from "./pandoc.ts"; -import svgForeignObjectHandler from "./svgForeignObject.ts"; +import htmlToSvgHandler from "./htmlToSvg.ts"; import qoiFuHandler from "./qoi-fu.ts"; import sppdHandler from "./sppd.ts"; import threejsHandler from "./threejs.ts"; @@ -88,7 +88,7 @@ try { handlers.push(renameZipHandler) } catch (_) { }; try { handlers.push(renameTxtHandler) } catch (_) { }; try { handlers.push(renameJsonHandler) } catch (_) { }; try { handlers.push(new envelopeHandler()) } catch (_) { }; -try { handlers.push(new svgForeignObjectHandler()) } catch (_) { }; +try { handlers.push(new htmlToSvgHandler()) } catch (_) { }; try { handlers.push(new qoiFuHandler()) } catch (_) { }; try { handlers.push(new sppdHandler()) } catch (_) { }; try { handlers.push(new threejsHandler()) } catch (_) { }; diff --git a/src/handlers/svgForeignObject.ts b/src/handlers/svgForeignObject.ts deleted file mode 100644 index 1ac21a08..00000000 --- a/src/handlers/svgForeignObject.ts +++ /dev/null @@ -1,106 +0,0 @@ -import CommonFormats from "src/CommonFormats.ts"; -import type { FileData, FileFormat, FormatHandler } from "../FormatHandler.ts"; - -class svgForeignObjectHandler implements FormatHandler { - - public name: string = "svgForeignObject"; - - public supportedFormats: FileFormat[] = [ - CommonFormats.HTML.supported("html", true, false), - // Identical to the input HTML, just wrapped in an SVG foreignObject, so it's lossless - CommonFormats.SVG.supported("svg", false, true, true) - ]; - - public ready: boolean = true; - - async init () { - this.ready = true; - } - - static async normalizeHTML (html: string) { - // To get the size of the input document, we need the - // browser to actually render it. - // Create a hidden "dummy" element on the DOM. - const dummy = document.createElement("div"); - dummy.style.all = "initial"; - dummy.style.visibility = "hidden"; - dummy.style.position = "fixed"; - document.body.appendChild(dummy); - - // Add a DOM shadow to the dummy to "sterilize" it. - const shadow = dummy.attachShadow({ mode: "closed" }); - const style = document.createElement("style"); - style.textContent = ":host>div{display:flow-root;}"; - shadow.appendChild(style); - - // Create a div within the shadow DOM to act as - // a container for our HTML payload. - const container = document.createElement("div"); - container.innerHTML = html; - shadow.appendChild(container); - - // Wait for all images to finish loading. This is required for layout - // changes, not because we actually care about the image contents. - const images = container.querySelectorAll("img, video"); - const promises = Array.from(images).map(image => new Promise(resolve => { - image.addEventListener("load", resolve); - image.addEventListener("loadeddata", resolve); - image.addEventListener("error", resolve); - })); - await Promise.all(promises); - - // Make sure the browser has had time to render. - // This is probably redundant due to the async calls above. - await new Promise(resolve => { - requestAnimationFrame(() => { - requestAnimationFrame(resolve); - }); - }); - - // Finally, get the bounding box of the input and serialize it to XML. - const bbox = container.getBoundingClientRect(); - const serializer = new XMLSerializer(); - const xml = serializer.serializeToString(container); - - container.remove(); - dummy.remove(); - - return { xml, bbox }; - } - - async doConvert ( - inputFiles: FileData[], - inputFormat: FileFormat, - outputFormat: FileFormat - ): Promise { - - if (inputFormat.internal !== "html") throw "Invalid input format."; - if (outputFormat.internal !== "svg") throw "Invalid output format."; - - const outputFiles: FileData[] = []; - - const encoder = new TextEncoder(); - const decoder = new TextDecoder(); - - for (const inputFile of inputFiles) { - const { name, bytes } = inputFile; - const html = decoder.decode(bytes); - const { xml, bbox } = await svgForeignObjectHandler.normalizeHTML(html); - const svg = ( - ` - - ${xml} - - `); - const outputBytes = encoder.encode(svg); - const newName = (name.endsWith(".html") ? name.slice(0, -5) : name) + ".svg"; - outputFiles.push({ name: newName, bytes: outputBytes }); - } - - return outputFiles; - - } - -} - -export default svgForeignObjectHandler; diff --git a/src/handlers/typst.ts b/src/handlers/typst.ts index df610bfc..245fb468 100644 --- a/src/handlers/typst.ts +++ b/src/handlers/typst.ts @@ -1,7 +1,27 @@ -import CommonFormats from "src/CommonFormats.ts"; +import CommonFormats, { Category } from "src/CommonFormats.ts"; import type { FileData, FileFormat, FormatHandler } from "../FormatHandler.ts"; import type { TypstSnippet } from "@myriaddreamin/typst.ts/dist/esm/contrib/snippet.mjs"; +function parseSvgPageDimensions(svgBytes: Uint8Array): { widthPt: number; heightPt: number } { + const head = new TextDecoder().decode(svgBytes.slice(0, 16384)); + const wAttr = head.match(/\bwidth="([\d.]+)\s*(?:px|pt)?"/i); + const hAttr = head.match(/\bheight="([\d.]+)\s*(?:px|pt)?"/i); + const vb = head.match(/viewBox="\s*[\d.]+\s+[\d.]+\s+([\d.]+)\s+([\d.]+)\s*"/i); + let w = 960; + let h = 540; + if (wAttr && hAttr) { + w = Number.parseFloat(wAttr[1]); + h = Number.parseFloat(hAttr[1]); + } else if (vb) { + w = Number.parseFloat(vb[1]); + h = Number.parseFloat(vb[2]); + } + return { + widthPt: Math.max(1, Number.isFinite(w) ? w : 960), + heightPt: Math.max(1, Number.isFinite(h) ? h : 540), + }; +} + class TypstHandler implements FormatHandler { public name: string = "typst"; public ready: boolean = false; @@ -9,7 +29,9 @@ class TypstHandler implements FormatHandler { public supportedFormats: FileFormat[] = [ CommonFormats.TYPST.supported("typst", true, false, true), CommonFormats.PDF.supported("pdf", false, true), - CommonFormats.SVG.supported("svg", false, true), + CommonFormats.SVG.supported("svg", true, true, false, { + category: [Category.IMAGE, Category.VECTOR, Category.DOCUMENT], + }), ]; private $typst?: TypstSnippet; @@ -32,13 +54,67 @@ class TypstHandler implements FormatHandler { this.ready = true; } + private async svgFilesToSinglePdf(inputFiles: FileData[]): Promise { + const $typst = this.$typst!; + const { widthPt, heightPt } = parseSvgPageDimensions(inputFiles[0].bytes); + + const id = `s${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 11)}`; + const shadowPaths: string[] = []; + const imageBasenames: string[] = []; + + for (let i = 0; i < inputFiles.length; i++) { + const basename = `${id}_${i}.svg`; + const absPath = `/tmp/${basename}`; + await $typst.mapShadow(absPath, inputFiles[i].bytes); + shadowPaths.push(absPath); + imageBasenames.push(basename); + } + + const body = imageBasenames + .map((basename, i) => { + const page = `#box(width: 100%, height: 100%)[#image("${basename}", width: 100%, height: 100%)]`; + return i < imageBasenames.length - 1 ? `${page}\n#pagebreak()\n` : page; + }) + .join("\n"); + + const mainContent = `#set page(margin: 0pt, width: ${widthPt}pt, height: ${heightPt}pt) +${body} +`; + + try { + const pdfData = await $typst.pdf({ mainContent }); + if (!pdfData) throw new Error("Typst compilation to PDF failed."); + const baseName = inputFiles[0].name.replace(/\.[^.]+$/u, ""); + return [{ + name: `${baseName}.pdf`, + bytes: new Uint8Array(pdfData), + }]; + } finally { + for (const p of shadowPaths) { + await $typst.unmapShadow(p); + } + await $typst.resetShadow(); + } + } + async doConvert( inputFiles: FileData[], - _inputFormat: FileFormat, + inputFormat: FileFormat, outputFormat: FileFormat, ): Promise { if (!this.ready || !this.$typst) throw new Error("Handler not initialized."); + if (inputFormat.internal === "svg" && outputFormat.internal === "svg") { + return inputFiles.map(f => ({ + name: f.name, + bytes: f.bytes.slice(), + })); + } + + if (inputFormat.internal === "svg" && outputFormat.internal === "pdf") { + return this.svgFilesToSinglePdf(inputFiles); + } + const outputFiles: FileData[] = []; for (const file of inputFiles) { @@ -66,4 +142,3 @@ class TypstHandler implements FormatHandler { } export default TypstHandler; - diff --git a/test/commonFormats.test.ts b/test/commonFormats.test.ts index bdf2cdb3..d9531a68 100644 --- a/test/commonFormats.test.ts +++ b/test/commonFormats.test.ts @@ -158,7 +158,7 @@ test("mp3 → png → gif", async () => { }, { timeout: 60000 }); -test("docx → html → svg → png → pdf", async () => { +test("docx → html → svg → pdf", async () => { const conversion = await attemptConversion( ["word.docx"], @@ -169,10 +169,10 @@ test("docx → html → svg → png → pdf", async () => { expect(conversion).toBeTruthy(); expect(conversion!.path.map(c => c.format.mime)).toEqual([ "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "text/html", "image/svg+xml", "image/png", "application/pdf" + "text/html", "image/svg+xml", "application/pdf" ]); const fileSize = Object.values(conversion!.files[0].bytes).length; - expect(fileSize).toBeWithin(55000, 65000); + expect(fileSize).toBeWithin(50000, 85000); }, { timeout: 60000 });