Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion src/main/constants/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@ export const GEMINI_MODEL_2_5_FLASH = 'gemini-2.5-flash'
export const GEMINI_MODEL_2_5_FLASH_LITE = 'gemini-2.5-flash-lite'
export const GEMINI_MODEL_3_FLASH_PREVIEW = 'gemini-3-flash-preview'
export const GEMINI_MODEL_3_1_FLASH_IMAGE_PREVIEW = 'gemini-3.1-flash-image-preview'
export const GEMINI_MODEL_3_PRO_IMAGE_PREVIEW = 'gemini-3-pro-image-preview'

export const MODEL_METADATA: Record<string, { label: string; description?: string }> = {
[GEMINI_MODEL_2_5_PRO]: { label: 'Gemini 2.5 Pro' },
[GEMINI_MODEL_2_5_FLASH]: { label: 'Gemini 2.5 Flash' },
[GEMINI_MODEL_2_5_FLASH_LITE]: { label: 'Gemini 2.5 Flash Lite' },
[GEMINI_MODEL_3_FLASH_PREVIEW]: { label: 'Gemini 3 Flash Preview' },
[GEMINI_MODEL_3_1_FLASH_IMAGE_PREVIEW]: {
label: 'Nano Banana 2',
description: 'Pro-level visual intelligence with Flash-speed efficiency and reality-grounded generation capabilities.'
},
[GEMINI_MODEL_3_PRO_IMAGE_PREVIEW]: {
label: 'Nano Banana Pro',
description: 'State-of-the-art image generation and editing model.'
}
}

export const DEFAULT_MODEL_SETTINGS: ModelSettings = {
pricing: {
Expand All @@ -24,7 +40,7 @@ export const DEFAULT_MODEL_SETTINGS: ModelSettings = {
input: {
text: 0.30,
audio: 1.00,
standard: 0.30 // Fallback for simple calc
standard: 0.30
},
output: {
standard: 2.50
Expand Down Expand Up @@ -58,6 +74,15 @@ export const DEFAULT_MODEL_SETTINGS: ModelSettings = {
standard: 3.00,
image: 0.0672
}
},
[GEMINI_MODEL_3_PRO_IMAGE_PREVIEW]: {
input: {
standard: 2.00
},
output: {
standard: 12.00,
image: 0.134
}
}
},
selection: {
Expand Down
48 changes: 37 additions & 11 deletions src/main/gemini/adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ export class GeminiAdapter {
}

private extractUsage(response: any): Usage {
if (!response) {
return { promptTokens: 0, candidatesTokens: 0, thinkingTokens: 0, totalTokens: 0 };
}
const usage = response.usageMetadata || { promptTokenCount: 0, candidatesTokenCount: 0, totalTokenCount: 0, thoughtsTokenCount: 0 };
return {
promptTokens: usage.promptTokenCount!,
candidatesTokens: usage.candidatesTokenCount!,
thinkingTokens: usage.thoughtsTokenCount,
totalTokens: usage.totalTokenCount!
promptTokens: usage.promptTokenCount || 0,
candidatesTokens: usage.candidatesTokenCount || 0,
thinkingTokens: usage.thoughtsTokenCount || 0,
totalTokens: usage.totalTokenCount || 0
};
}

Expand Down Expand Up @@ -126,26 +129,49 @@ export class GeminiAdapter {
modelName: string,
userPrompt: string,
systemInstruction?: string,
signal?: AbortSignal
signal?: AbortSignal,
imagePaths?: string[]
): Promise<{ text: string, record: UsageRecord }> {
const request: GenerateContentParameters = {
const parts: any[] = []

// Add image parts if provided
const validImagePaths: string[] = []
if (imagePaths && imagePaths.length > 0) {
for (const imgPath of imagePaths) {
if (fs.existsSync(imgPath)) {
const data = fs.readFileSync(imgPath).toString('base64')
parts.push({
inlineData: {
data,
mimeType: 'image/jpeg'
}
})
validImagePaths.push(imgPath)
}
}
}

// Add text part LAST
parts.push({ text: userPrompt })

const request: any = {
model: modelName,
contents: [{ role: 'user', parts: [{ text: userPrompt }] }]
contents: [{ role: 'user', parts }]
};

if (systemInstruction) {
request.config = {
systemInstruction: systemInstruction
request.systemInstruction = {
parts: [{ text: systemInstruction }]
};
}

try {
const response = await this.withRetry(
() => (this.client.models as any).generateContent(request, { signal }) as Promise<any>,
() => (this.client as any).models.generateContent(request, { signal }),
signal
);
const usage = this.extractUsage(response);
const cost = GeminiAdapter.calculateCost(modelName, usage);
const cost = GeminiAdapter.calculateCost(modelName, usage, 0, validImagePaths.length);

return {
text: this.extractResultText(response),
Expand Down
58 changes: 54 additions & 4 deletions src/main/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { checkFFmpegAvailability, getVideoMetadata } from './ffmpeg'
import { checkScenedetectAvailability } from './scenedetect'
import { checkYtDlpAvailability, downloadVideo, getVideoFormats } from './ytdlp'
import { THREAD_DIRS } from './constants/paths'
import { GEMINI_MODEL_2_5_FLASH, MODEL_METADATA } from './constants/gemini'
import { electronApp, optimizer, is } from '@electron-toolkit/utils'

const activePipelines = new Map<string, Pipeline>()
Expand Down Expand Up @@ -310,6 +311,10 @@ app.whenReady().then(() => {
return settingsManager.resetModelSettings()
})

ipcMain.handle('get-model-metadata', () => {
return MODEL_METADATA
})

// Thread Management
ipcMain.handle('create-thread', async (_event, { videoPath, videoName, imagePaths }) => {
const newThread = await threadManager.createThread(videoPath, videoName, imagePaths)
Expand Down Expand Up @@ -375,9 +380,9 @@ app.whenReady().then(() => {
return await threadManager.toggleThreadReferenceFrame(threadId, filePath)
})

ipcMain.handle('show-confirmation', async (_event, { title, message, detail, type = 'question', buttons = ['Cancel', 'Yes'], defaultId = 1, cancelId = 0 }) => {
ipcMain.handle('show-confirmation', async (_event, { title, message, detail, type = 'question', buttons = ['Cancel', 'Yes'], defaultId = 1, cancelId = 0, checkboxLabel }) => {
const focusedWindow = BrowserWindow.getFocusedWindow()
if (!focusedWindow) return cancelId
if (!focusedWindow) return { response: cancelId, checkboxChecked: false }

const result = await dialog.showMessageBox(focusedWindow, {
type: type as any,
Expand All @@ -386,10 +391,14 @@ app.whenReady().then(() => {
cancelId,
title,
message,
detail
detail,
checkboxLabel
})

return result.response
return {
response: result.response,
checkboxChecked: result.checkboxChecked
}
})

ipcMain.handle('save-video', async (_event, sourcePath: string) => {
Expand Down Expand Up @@ -457,6 +466,47 @@ app.whenReady().then(() => {
return result.path
})

ipcMain.handle('improvise-message', async (_event, { threadId, messageId }) => {
console.log(`[DEBUG IPC] improvise-message called: threadId=${threadId}, messageId=${messageId}`)

const thread = threadManager.getThread(threadId)
if (!thread) throw new Error('Thread not found')

const message = thread.messages.find(m => m.id === messageId)
if (!message) throw new Error('Message not found')

// Get the history leading up to this message (including the message itself)
const { text: context } = threadManager.getBranchContext(threadId, messageId)

// Collect attached images from THIS message only for visual context in improvisation
const attachedImages = message.attachedImages || []

const modelSettings = settingsManager.getModelSettings()
const modelName = modelSettings.selection['intent'] || GEMINI_MODEL_2_5_FLASH
const adapter = GeminiAdapter.create()

const systemInstruction = `You are a high-level Prompt Engineer. Your task is to IMPROVISE and REWRITE a user's latest prompt to be much more descriptive, detailed, and effective for high-fidelity AI generation.
- The conversation history is provided ONLY for context. You MUST NOT improve the history, only the latest prompt.
- Use the attached images for visual context to make the prompt more vivid and accurate.
- Maintain the original intent exactly.
- USE natural, descriptive, and professional language.
- DO NOT return JSON, technical metadata, coordinates, bounding boxes, or "box_2d" tags.
- DO NOT return object detection results.
- DO NOT include any explanations, labels like "Improvised:", or surrounding quotes.
- Return ONLY the clean, improved prompt text itself.`

const userPrompt = `[CONTEXT/HISTORY]:\n${context}\n\n[TASK]: Based on the context above and the attached images, rewrite the user's latest message to be a high-performance, descriptive generation prompt. Return only the improved text.`

const result = await adapter.generateText(modelName, userPrompt, systemInstruction, undefined, attachedImages)

// Track usage for the message that initiated it
if (result.record) {
await threadManager.updateMessageUsage(threadId, messageId, result.record)
}

return result.text
})

app.on('activate', function () {
if (BrowserWindow.getAllWindows().length === 0) createWindow()
})
Expand Down
53 changes: 51 additions & 2 deletions src/main/threads/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ class ThreadManager {
messages: [],
versionCounter: 0,
videoMetadata,
usageHistory: [],
createdAt: Date.now(),
updatedAt: Date.now()
}
Expand Down Expand Up @@ -277,6 +278,11 @@ class ThreadManager {
// Try to repair paths if they seem broken/moved
thread = this.repairThreadPaths(thread)

// Migrate usage if needed
if (this.migrateThreadUsage(thread)) {
this.saveThread(thread)
}

// Mark as missing if artifacts directory is not found, but DO NOT DELETE metadata.
// This allows the user to repair the path or recover files manually.
thread.missing = !!(thread.tempDir && !fs.existsSync(thread.tempDir))
Expand All @@ -300,7 +306,14 @@ class ThreadManager {

try {
const content = fs.readFileSync(filePath, 'utf-8')
return JSON.parse(content)
const thread = JSON.parse(content) as Thread

// Migrate usage if needed
if (this.migrateThreadUsage(thread)) {
this.saveThread(thread)
}

return thread
} catch (error) {
console.error(`Failed to read thread ${id}:`, error)
return null
Expand Down Expand Up @@ -460,8 +473,18 @@ class ThreadManager {

// Update usage and cost for a message
async updateMessageUsage(threadId: string, messageId: string, record: UsageRecord): Promise<boolean> {
const thread = this.getThread(threadId)
if (!thread) return false

const historyRecord: UsageRecord = {
...record,
timestamp: Date.now(),
messageId
}

const result = await this.updateThread(threadId, {
messages: (this.getThread(threadId)?.messages || []).map(m => {
usageHistory: [...(thread.usageHistory || []), historyRecord],
messages: (thread.messages || []).map(m => {
if (m.id !== messageId) return m

const newUsage: Usage = {
Expand Down Expand Up @@ -677,6 +700,32 @@ class ThreadManager {

return !!result
}

// Migrate usage from messages to usageHistory if history is empty
private migrateThreadUsage(thread: Thread): boolean {
// If usageHistory already has data, no migration needed
if (thread.usageHistory && thread.usageHistory.length > 0) return false

const history: UsageRecord[] = []
for (const msg of thread.messages) {
if (msg.usage && msg.cost && msg.cost > 0) {
history.push({
usage: msg.usage,
cost: msg.cost,
timestamp: msg.createdAt,
messageId: msg.id
})
}
}

// Only migrate if we found usage or if usageHistory is missing (to initialize it)
if (history.length > 0 || thread.usageHistory === undefined) {
thread.usageHistory = history
return true
}

return false
}
}

export const threadManager = new ThreadManager()
41 changes: 23 additions & 18 deletions src/main/timeline/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,17 @@ Task: Provide a list of indices representing the new timeline after applying the
`;

try {
const { text: responseText, record } = await geminiAdapter.generateText(modelName, prompt, systemInstruction, signal);
console.log(`Gemini response (Edit Mode):`, responseText);
const { data: indices, record } = await geminiAdapter.generateStructuredText<number[]>(
modelName,
prompt,
INDICES_SCHEMA,
systemInstruction,
signal
);
console.log(`Gemini response (Edit Mode):`, indices);

// Record usage for the edit call
onRecordUsage?.(record);

const indices = parseIndicesFromResponse(responseText);
const newTimeline: EnrichedTimelineSegment[] = [];

for (const idx of indices) {
Expand Down Expand Up @@ -251,14 +255,18 @@ Task: Pick the next 3 segments to add to the timeline.
`;

try {
const { text: responseText, record } = await geminiAdapter.generateText(modelName, prompt, systemInstruction, signal);
const { data: indices, record } = await geminiAdapter.generateStructuredText<number[]>(
modelName,
prompt,
INDICES_SCHEMA,
systemInstruction,
signal
);

// Record usage IMMEDIATELY after call finishes, before any abort checks
onRecordUsage?.(record);

console.log(`Gemini response (Iteration ${iterationCount}):`, responseText);

const indices = parseIndicesFromResponse(responseText);
console.log(`Gemini response (Iteration ${iterationCount}):`, indices);

if (indices.length === 0) {
console.warn("No indices returned from Gemini, stopping loop.");
Expand Down Expand Up @@ -338,13 +346,10 @@ function formatEnrichedTranscript(items: EnrichedTimelineSegment[]): string {
}


function parseIndicesFromResponse(text: string): number[] {
const jsonMatch = text.match(/\[([\d,\s]+)\]/);
if (jsonMatch) {
return jsonMatch[1]
.split(',')
.map(s => parseInt(s.trim()))
.filter(n => !isNaN(n));
}
return [];
}
/**
* JSON schema that forces Gemini to return a plain array of segment indices.
*/
const INDICES_SCHEMA = {
type: 'ARRAY',
items: { type: 'INTEGER' }
};
5 changes: 4 additions & 1 deletion src/preload/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ const api = {
getVideoMetadata: (filePath: string) => ipcRenderer.invoke('get-video-metadata', filePath),
showOpenDialog: (options: any) => ipcRenderer.invoke('show-open-dialog', options),
upscaleImage: (data: { threadId: string, messageId: string, imagePath: string, upscaleFactor: string }) =>
ipcRenderer.invoke('upscale-image', data)
ipcRenderer.invoke('upscale-image', data),
improviseMessage: (threadId: string, messageId: string) =>
ipcRenderer.invoke('improvise-message', { threadId, messageId }),
getModelMetadata: () => ipcRenderer.invoke('get-model-metadata')
}

if (process.contextIsolated) {
Expand Down
3 changes: 2 additions & 1 deletion src/renderer/src/assets/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
}

.glass-card-hover {
@apply hover:shadow-premium-hover hover:border-primary/30 transition-all duration-500;
@apply hover:shadow-premium-hover hover:border-primary/30;
transition: box-shadow 0.5s cubic-bezier(0.4, 0, 0.2, 1), border-color 0.5s cubic-bezier(0.4, 0, 0.2, 1);
}

.input-focus-ring {
Expand Down
Loading
Loading