mithun50 · mithun50 · Mar 22, 2026 · Mar 20, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.github/workflows/render-video.yml b/.github/workflows/render-video.yml
@@ -0,0 +1,90 @@
+name: Render Promo Video
+
+on:
+  push:
+    branches: [main, feat/video, feat/image-handling]
+    paths:
+      - "video/**"
+  pull_request:
+    branches: [main]
+    paths:
+      - "video/**"
+  workflow_dispatch:
+    inputs:
+      codec:
+        description: "Output codec"
+        required: false
+        default: "h264"
+        type: choice
+        options:
+          - h264
+          - h265
+          - vp8
+          - vp9
+      crf:
+        description: "Constant Rate Factor (quality, lower = better)"
+        required: false
+        default: "18"
+
+jobs:
+  render:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+          cache-dependency-path: video/package-lock.json
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y \
+            chromium-browser \
+            libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 \
+            libxcomposite1 libxdamage1 libxrandr2 libgbm1 \
+            libpango-1.0-0 libcairo2 libxshmfence1 \
+            fonts-noto fonts-liberation ffmpeg
+          sudo apt-get install -y libasound2t64 || sudo apt-get install -y libasound2
+          echo "BROWSER_PATH=$(which chromium-browser || which chromium || which google-chrome-stable || which google-chrome)" >> $GITHUB_ENV
+
+      - name: Install dependencies
+        working-directory: video
+        run: npm ci
+
+      - name: Cache Remotion bundle
+        uses: actions/cache@v4
+        with:
+          path: video/node_modules/.cache/remotion
+          key: remotion-bundle-${{ hashFiles('video/src/**') }}
+          restore-keys: remotion-bundle-
+
+      - name: Render video
+        working-directory: video
+        run: |
+          npx remotion render src/index.ts TreeDexVideo out/treedex.mp4 \
+            --browser-executable="$BROWSER_PATH" \
+            --codec=${{ github.event.inputs.codec || 'h264' }} \
+            --crf=${{ github.event.inputs.crf || '18' }} \
+            --concurrency=4 \
+            --image-format=jpeg \
+            --quality=85 \
+            --bundle-cache=true \
+            --log=verbose
+
+      - name: Upload rendered video
+        uses: actions/upload-artifact@v4
+        with:
+          name: treedex-promo-video
+          path: video/out/treedex.mp4
+          retention-days: 30
+
+      - name: Print video info
+        working-directory: video
+        run: |
+          ls -lh out/treedex.mp4
+          ffprobe -v quiet -print_format json -show_format -show_streams out/treedex.mp4 | head -50
diff --git a/src/core.ts b/src/core.ts
@@ -23,10 +23,53 @@ import {
   structureContinuePrompt,
   retrievalPrompt,
   answerPrompt,
+  imageDescriptionPrompt,
 } from "./prompts.js";
+import { countTokens } from "./pdf-parser.js";
 import type { Page, TreeNode, IndexData, Stats } from "./types.js";
 import type { BaseLLM } from "./llm-backends.js";
 
+/** Append image descriptions to page text, modifying pages in place. */
+async function describeImages(
+  pages: Page[],
+  llm?: BaseLLM | null,
+  verbose: boolean = false,
+): Promise<void> {
+  for (const page of pages) {
+    if (!page.images || page.images.length === 0) continue;
+
+    const descriptions: string[] = [];
+    for (const img of page.images) {
+      const alt = (img.alt_text ?? "").trim();
+      if (alt) {
+        descriptions.push(`[Image: ${alt}]`);
+      } else if (llm?.supportsVision && img.data) {
+        try {
+          const desc = await llm.generateWithImage(
+            imageDescriptionPrompt(),
+            img.data,
+            img.mime_type,
+          );
+          descriptions.push(`[Image: ${desc.trim()}]`);
+        } catch {
+          descriptions.push("[Image present]");
+        }
+      } else {
+        descriptions.push("[Image present]");
+      }
+    }
+
+    if (descriptions.length > 0) {
+      page.text = page.text + "\n" + descriptions.join("\n");
+      page.token_count = countTokens(page.text);
+    }
+
+    if (verbose && descriptions.length > 0) {
+      console.log(`  Page ${page.page_num}: ${descriptions.length} image(s) described`);
+    }
+  }
+}
+
 /** Result of a TreeDex query. */
 export class QueryResult {
   readonly context: string;
@@ -100,13 +143,15 @@ export class TreeDex {
       maxTokens?: number;
       overlap?: number;
       verbose?: boolean;
+      extractImages?: boolean;
     },
   ): Promise<TreeDex> {
     const {
       loader,
       maxTokens = 20000,
       overlap = 1,
       verbose = true,
+      extractImages = false,
     } = options ?? {};
 
     if (verbose) {
@@ -118,7 +163,7 @@ export class TreeDex {
     if (loader) {
       pages = await loader.load(path);
     } else {
-      pages = await autoLoader(path);
+      pages = await autoLoader(path, { extractImages });
     }
 
     if (verbose) {
@@ -141,6 +186,9 @@ export class TreeDex {
   ): Promise<TreeDex> {
     const { maxTokens = 20000, overlap = 1, verbose = true } = options ?? {};
 
+    // Describe images before grouping — appends text markers to pages
+    await describeImages(pages, llm, verbose);
+
     const groups = groupPages(pages, maxTokens, overlap);
 
     if (verbose) {
@@ -288,11 +336,14 @@ export class TreeDex {
     const fs = await import("node:fs/promises");
     const stripped = stripTextFromTree(this.tree);
 
+    // Strip images from pages — descriptions are already in text
+    const cleanPages: Page[] = this.pages.map(({ images: _images, ...rest }) => rest);
+
     const data: IndexData = {
       version: "1.0",
       framework: "TreeDex",
       tree: stripped,
-      pages: this.pages,
+      pages: cleanPages,
     };
 
     await fs.writeFile(path, JSON.stringify(data, null, 2), "utf-8");

diff --git a/src/index.ts b/src/index.ts
@@ -56,5 +56,6 @@ export {
   structureContinuePrompt,
   retrievalPrompt,
   answerPrompt,
+  imageDescriptionPrompt,
 } from "./prompts.js";
-export type { Page, TreeNode, IndexData, Stats } from "./types.js";
+export type { Page, PageImage, TreeNode, IndexData, Stats } from "./types.js";
diff --git a/src/llm-backends.ts b/src/llm-backends.ts
@@ -33,6 +33,22 @@
 export abstract class BaseLLM {
   abstract generate(prompt: string): Promise<string>;
 
+  /** Whether this backend supports image inputs. */
+  get supportsVision(): boolean {
+    return false;
+  }
+
+  /** Send a prompt with an image and return the generated text. */
+  async generateWithImage(
+    _prompt: string,
+    _imageBase64: string,
+    _mimeType: string,
+  ): Promise<string> {
+    throw new Error(
+      `${this.constructor.name} does not support vision/image inputs.`,
+    );
+  }
+
   toString(): string {
     return `${this.constructor.name}()`;
   }
@@ -65,11 +81,28 @@ export class GeminiLLM extends BaseLLM {
   }
 
   async generate(prompt: string): Promise<string> {
-    const model = await this.getClient() as { generateContent(p: string): Promise<{ response: { text(): string } }> };
+    const model = await this.getClient() as { generateContent(p: unknown): Promise<{ response: { text(): string } }> };
     const response = await model.generateContent(prompt);
     return response.response.text();
   }
 
+  get supportsVision(): boolean {
+    return true;
+  }
+
+  async generateWithImage(
+    prompt: string,
+    imageBase64: string,
+    mimeType: string,
+  ): Promise<string> {
+    const model = await this.getClient() as { generateContent(p: unknown): Promise<{ response: { text(): string } }> };
+    const imagePart = {
+      inlineData: { mimeType, data: imageBase64 },
+    };
+    const response = await model.generateContent([prompt, imagePart]);
+    return response.response.text();
+  }
+
   toString(): string {
     return `GeminiLLM(model=${JSON.stringify(this.modelName)})`;
   }
@@ -113,6 +146,40 @@ export class OpenAILLM extends BaseLLM {
     return response.choices[0].message.content;
   }
 
+  get supportsVision(): boolean {
+    return true;
+  }
+
+  async generateWithImage(
+    prompt: string,
+    imageBase64: string,
+    mimeType: string,
+  ): Promise<string> {
+    const client = await this.getClient() as {
+      chat: {
+        completions: {
+          create(opts: unknown): Promise<{
+            choices: Array<{ message: { content: string } }>;
+          }>;
+        };
+      };
+    };
+    const response = await client.chat.completions.create({
+      model: this.modelName,
+      messages: [{
+        role: "user",
+        content: [
+          { type: "text", text: prompt },
+          {
+            type: "image_url",
+            image_url: { url: `data:${mimeType};base64,${imageBase64}` },
+          },
+        ],
+      }],
+    });
+    return response.choices[0].message.content;
+  }
+
   toString(): string {
     return `OpenAILLM(model=${JSON.stringify(this.modelName)})`;
   }
@@ -155,6 +222,43 @@ export class ClaudeLLM extends BaseLLM {
     return response.content[0].text;
   }
 
+  get supportsVision(): boolean {
+    return true;
+  }
+
+  async generateWithImage(
+    prompt: string,
+    imageBase64: string,
+    mimeType: string,
+  ): Promise<string> {
+    const client = await this.getClient() as {
+      messages: {
+        create(opts: unknown): Promise<{
+          content: Array<{ text: string }>;
+        }>;
+      };
+    };
+    const response = await client.messages.create({
+      model: this.modelName,
+      max_tokens: 4096,
+      messages: [{
+        role: "user",
+        content: [
+          {
+            type: "image",
+            source: {
+              type: "base64",
+              media_type: mimeType,
+              data: imageBase64,
+            },
+          },
+          { type: "text", text: prompt },
+        ],
+      }],
+    });
+    return response.content[0].text;
+  }
+
   toString(): string {
     return `ClaudeLLM(model=${JSON.stringify(this.modelName)})`;
   }