From 2bd7f2e8b9de5b117dd355a8ff44f2d369d450f6 Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Tue, 2 Sep 2025 01:57:39 +0900 Subject: [PATCH 01/13] install xsai & deps --- package-lock.json | 232 +++++++++++++++++++++++++++++++++++++++++++++- package.json | 7 +- 2 files changed, 237 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index 065c3db..e3d977b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,6 +16,8 @@ "@fortawesome/react-fontawesome": "^0.1.19", "@jokester/ts-commonutil": "^0.6.1", "@reduxjs/toolkit": "^1.9.7", + "@xsai-ext/providers-cloud": "^0.4.0-beta.2", + "@xsai/generate-object": "^0.4.0-beta.2", "@zip.js/zip.js": "^2.7.60", "antd": "^4.24.16", "antd-img-crop": "^3.16.0", @@ -49,7 +51,10 @@ "redux-saga": "^1.3.0", "store": "^2.0.12", "use-debounce": "^10.0.4", - "uuid": "^7.0.3" + "uuid": "^7.0.3", + "xsai": "^0.4.0-beta.2", + "zod": "^3.25.76", + "zod-to-json-schema": "^3.24.6" }, "devDependencies": { "@tsconfig/strictest": "^2.0.5", @@ -2944,6 +2949,156 @@ "resolved": "https://registry.npmjs.org/@xobotyi/scrollbar-width/-/scrollbar-width-1.9.5.tgz", "integrity": "sha512-N8tkAACJx2ww8vFMneJmaAgmjAG1tnVBZJRLRcx061tmsLRZHSEZSLuGWnwPtunsSLvSqXQ2wfp7Mgqg1I+2dQ==" }, + "node_modules/@xsai-ext/providers-cloud": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai-ext/providers-cloud/-/providers-cloud-0.4.0-beta.2.tgz", + "integrity": "sha512-kquc/gLHZzBevdSbpRIlLr6jBHToVbvVIhjeUtqMuGcL613l9A9CJ2CSlnHmrgxHZQSSPIBeUAl0WEG4sWsP9g==", + "license": "MIT", + "dependencies": { + "@xsai-ext/shared-providers": "~0.4.0-beta.2", + "@xsai/shared": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai-ext/shared-providers": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai-ext/shared-providers/-/shared-providers-0.4.0-beta.2.tgz", + "integrity": "sha512-+GEct6b9Q1/4o9NpoL8+aviZPiV5EU/r100F6gNlSk9QweWCqlfAUCnS9eAIWbGJ3fZcBADGxWyQE6Sk6a3LGQ==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/embed": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/embed/-/embed-0.4.0-beta.2.tgz", + "integrity": "sha512-9tl8WZvIbqjMidOvtDTeGMoeK0d8i6Wz7T6NEHwFuWt4ZLeFn3PXjx7Sm5F/607ByBs1mp6p7P4KRA0kR3ma4Q==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/generate-image": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/generate-image/-/generate-image-0.4.0-beta.2.tgz", + "integrity": "sha512-pxpiWW7NqBQkzREKByADM9l5Q+15an/K4RW5zorM2D2koqnK09pNH7jxMOJZwsjbTQE1+h38MwhEeJXdstokEw==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/generate-object": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/generate-object/-/generate-object-0.4.0-beta.2.tgz", + "integrity": "sha512-nkcY2Mn01s7p0SiNhYUlsrrrrOUgEQZtnGpfTfefAi0bynxXLVg//MEpm3tS4WZUpQvcZZRjTgMU91tdEyHxmQ==", + "license": "MIT", + "dependencies": { + "@xsai/generate-text": "~0.4.0-beta.2", + "xsschema": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/generate-speech": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/generate-speech/-/generate-speech-0.4.0-beta.2.tgz", + "integrity": "sha512-DitmNQYkTbz6a4btBFDZOlNxs2tU0JuE60r4FjaNDU1kpI5X2Ah49kfcCQya9i+3RnXDcgPMUzAd1zgOmFEkGw==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/generate-text": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/generate-text/-/generate-text-0.4.0-beta.2.tgz", + "integrity": "sha512-H0Fq8+O/8zJpNiwW4+PjUYQfrZlfh0DFvUmPg3wPFdQULZICKhMbxP/adZTIkXw+w7hXwd2Uho8aYjhzrEIfUg==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2", + "@xsai/shared-chat": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/generate-transcription": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/generate-transcription/-/generate-transcription-0.4.0-beta.2.tgz", + "integrity": "sha512-LVUM5Ew7GEuSUn5H9Gvz14YLHn/T2Dc/RngdEvYg+HNAm9CsLq51A5T6aqEpkkK2csAOsMvtaHEoEIYSKISDAQ==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/model": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/model/-/model-0.4.0-beta.2.tgz", + "integrity": "sha512-gNfCbfdYw3mCi9OUMe4OGVZ3I752QveOndMVT/99VymC0c8albdKLBGT8UgRLrW6bKTl1Vx9Vy0kausZWxo6jw==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/shared": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/shared/-/shared-0.4.0-beta.2.tgz", + "integrity": "sha512-nKdT+/gon1FxkEqv1iKfS2QRWnmYY/2o7Wl+Bcfot45qACE0sK9E4nw2BeLI/MeRYD6w7bTLP2J2U8373aHdYA==", + "license": "MIT" + }, + "node_modules/@xsai/shared-chat": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/shared-chat/-/shared-chat-0.4.0-beta.2.tgz", + "integrity": "sha512-2+HX5XEiC4x17NvtlIGTA/aOH9/EyJ2bD/gS+nmbiU8zuPykffS5EJ7CwuBt8rWTQUXEDHFA6hRpNLG7Q64EfA==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/stream-object": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/stream-object/-/stream-object-0.4.0-beta.2.tgz", + "integrity": "sha512-FBjVEVs6HMS5U7RMXgzx3h+3p7M6dnDVP2dzL4oP8RznFYvat1tU/tZWEER0pdLa/ny7b3thzH36z07DS0wzeQ==", + "license": "MIT", + "dependencies": { + "@xsai/stream-text": "~0.4.0-beta.2", + "xsschema": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/stream-text": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/stream-text/-/stream-text-0.4.0-beta.2.tgz", + "integrity": "sha512-16jQfXZ6RTw5JsN6zxeJ4At1CMsCrZUGJLvVsRmdfMRVjhxEXGGSvK1meMnWPKA4xtf+UEQCvwtJW6t8YQlK/w==", + "license": "MIT", + "dependencies": { + "@xsai/shared-chat": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/tool": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/tool/-/tool-0.4.0-beta.2.tgz", + "integrity": "sha512-yp+nD6/l6pHwr8LYYckEub/+ZDz2NkSRVwguU9Uv1nlIPPi5OR4txiF8LnjC35msJptCqKioFki+FkWiYd32WA==", + "license": "MIT", + "dependencies": { + "@xsai/shared": "~0.4.0-beta.2", + "@xsai/shared-chat": "~0.4.0-beta.2", + "xsschema": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/utils-chat": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/utils-chat/-/utils-chat-0.4.0-beta.2.tgz", + "integrity": "sha512-VgB9ohysQFUA6mogvP0e3R24Tr+RH0VKZ28Yx0Y372ptzo3hXjFjlftnpXCdNOtY1usiujVuC17WuryhV041aQ==", + "license": "MIT", + "dependencies": { + "@xsai/shared-chat": "~0.4.0-beta.2" + } + }, + "node_modules/@xsai/utils-reasoning": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/utils-reasoning/-/utils-reasoning-0.4.0-beta.2.tgz", + "integrity": "sha512-JFzgRVppyEPadqkNIbsrdViwZxY7/BGuWvFdb9zmaPiGLrOB1O6vCRs2GHMKAcJHvz1j7klDEc2JoMOcJtJXmQ==", + "license": "MIT" + }, + "node_modules/@xsai/utils-stream": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/@xsai/utils-stream/-/utils-stream-0.4.0-beta.2.tgz", + "integrity": "sha512-4+ecBLGZ7LMPHEvz6QSFVkdZLLlgieycDtPSJgvZxy6sNLekuhFzTDdhJ7Q717zZt8oCyYnBSG2m+moBwwe/2g==", + "license": "MIT" + }, "node_modules/@zip.js/zip.js": { "version": "2.7.60", "resolved": "https://registry.npmjs.org/@zip.js/zip.js/-/zip.js-2.7.60.tgz", @@ -13111,6 +13266,63 @@ "node": "^12.13.0 || ^14.15.0 || >=16.0.0" } }, + "node_modules/xsai": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/xsai/-/xsai-0.4.0-beta.2.tgz", + "integrity": "sha512-AXThpC7TkdA1vcZ0+xEwsp5WK6bt5S0UY9N0bh8EHVZS0lWzjrLk3AYGhTnHNRSc8okQekXDBXEWN1jj0fet1A==", + "license": "MIT", + "dependencies": { + "@xsai/embed": "~0.4.0-beta.2", + "@xsai/generate-image": "~0.4.0-beta.2", + "@xsai/generate-object": "~0.4.0-beta.2", + "@xsai/generate-speech": "~0.4.0-beta.2", + "@xsai/generate-text": "~0.4.0-beta.2", + "@xsai/generate-transcription": "~0.4.0-beta.2", + "@xsai/model": "~0.4.0-beta.2", + "@xsai/shared": "~0.4.0-beta.2", + "@xsai/shared-chat": "~0.4.0-beta.2", + "@xsai/stream-object": "~0.4.0-beta.2", + "@xsai/stream-text": "~0.4.0-beta.2", + "@xsai/tool": "~0.4.0-beta.2", + "@xsai/utils-chat": "~0.4.0-beta.2", + "@xsai/utils-reasoning": "~0.4.0-beta.2", + "@xsai/utils-stream": "~0.4.0-beta.2" + } + }, + "node_modules/xsschema": { + "version": "0.4.0-beta.2", + "resolved": "https://registry.npmjs.org/xsschema/-/xsschema-0.4.0-beta.2.tgz", + "integrity": "sha512-bzwAHTao5dcEy+GM/mVPbrWuFslUCPizvMjrLqV0PTDNw1jIUuYc+eNdSVl7+vp5RtA5DfWAf+qdTCJitQoCSw==", + "license": "MIT", + "peerDependencies": { + "@valibot/to-json-schema": "^1.0.0", + "arktype": "^2.1.20", + "effect": "^3.16.0", + "sury": "^10.0.0", + "zod": "^3.25.0 || ^4.0.0", + "zod-to-json-schema": "^3.24.5" + }, + "peerDependenciesMeta": { + "@valibot/to-json-schema": { + "optional": true + }, + "arktype": { + "optional": true + }, + "effect": { + "optional": true + }, + "sury": { + "optional": true + }, + "zod": { + "optional": true + }, + "zod-to-json-schema": { + "optional": true + } + } + }, "node_modules/xtend": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", @@ -13183,6 +13395,24 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.24.6", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.6.tgz", + "integrity": "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.24.1" + } + }, "node_modules/zscroller": { "version": "0.4.8", "resolved": "https://registry.npmjs.org/zscroller/-/zscroller-0.4.8.tgz", diff --git a/package.json b/package.json index 85e9fe8..ea800b6 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,8 @@ "@fortawesome/react-fontawesome": "^0.1.19", "@jokester/ts-commonutil": "^0.6.1", "@reduxjs/toolkit": "^1.9.7", + "@xsai-ext/providers-cloud": "^0.4.0-beta.2", + "@xsai/generate-object": "^0.4.0-beta.2", "@zip.js/zip.js": "^2.7.60", "antd": "^4.24.16", "antd-img-crop": "^3.16.0", @@ -43,7 +45,10 @@ "redux-saga": "^1.3.0", "store": "^2.0.12", "use-debounce": "^10.0.4", - "uuid": "^7.0.3" + "uuid": "^7.0.3", + "xsai": "^0.4.0-beta.2", + "zod": "^3.25.76", + "zod-to-json-schema": "^3.24.6" }, "scripts": { "build": "vite build", From 26446ee877e3e57906db8a75d47d9d6f7ffb9f0a Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Tue, 2 Sep 2025 02:18:52 +0900 Subject: [PATCH 02/13] remove old code & add multimodal_recognize service --- .../{project => ai}/FileListAiTranslate.tsx | 0 src/services/ai/TranslateCompanion.tsx | 320 ------------------ src/services/ai/mit_preprocess.ts | 97 ------ src/services/ai/multimodal_recognize.ts | 39 ++- src/services/ai/use_moeflow_companion.ts | 107 ------ src/services/labelplus_packager.ts | 80 ----- 6 files changed, 37 insertions(+), 606 deletions(-) rename src/components/{project => ai}/FileListAiTranslate.tsx (100%) delete mode 100644 src/services/ai/TranslateCompanion.tsx delete mode 100644 src/services/ai/mit_preprocess.ts delete mode 100644 src/services/ai/use_moeflow_companion.ts delete mode 100644 src/services/labelplus_packager.ts diff --git a/src/components/project/FileListAiTranslate.tsx b/src/components/ai/FileListAiTranslate.tsx similarity index 100% rename from src/components/project/FileListAiTranslate.tsx rename to src/components/ai/FileListAiTranslate.tsx diff --git a/src/services/ai/TranslateCompanion.tsx b/src/services/ai/TranslateCompanion.tsx deleted file mode 100644 index 7688434..0000000 --- a/src/services/ai/TranslateCompanion.tsx +++ /dev/null @@ -1,320 +0,0 @@ -import { FC } from '@/interfaces'; -import { RefObject, useRef, useState } from 'react'; -import { FilePond } from 'react-filepond'; -import { css } from '@emotion/core'; -import { Button } from '@/components/shared/Button'; -import { createMoeflowProjectZip, LPFile } from '../labelplus_packager'; -import { FailureResults } from '@/apis'; -import { measureImgSize } from '@jokester/ts-commonutil/lib/web/measure-img'; -import { clamp } from 'lodash-es'; -import { BBox, mitPreprocess, TextQuad } from './mit_preprocess'; -import { ResourcePool } from '@jokester/ts-commonutil/lib/concurrency/resource-pool'; - -const MAX_FILE_COUNT = 30; - -function getQuadCenter(q: TextQuad) { - const xs = q.pts.flatMap((pt) => pt.map((p) => p[0])); - const ys = q.pts.flatMap((pt) => pt.map((p) => p[1])); - const minX = Math.min(...xs); - const maxX = Math.max(...xs); - const minY = Math.min(...ys); - const maxY = Math.max(...ys); - return { - x: (minX + maxX) / 2, - y: (minY + maxY) / 2, - }; -} - -function buildLpFile( - img: File, - size: { width: number; height: number }, - textQuads: TextQuad[], -): LPFile { - const labels = textQuads - .sort((a, b) => { - // sort : top=>bottom , right=>left - const ca = getQuadCenter(a); - const cb = getQuadCenter(b); - return Math.sign(ca.y - cb.y) || Math.sign(cb.x - ca.x); - }) - .map((q) => { - const { x, y } = getQuadCenter(q); - return { - x: clamp(x / size.width, 0, 1), - y: clamp(y / size.height, 0, 1), - position_type: 1, - translation: `${q.raw_text}\n${q.translated}`, - }; - }); - console.debug('labels', labels); - return { - file_name: img.name, - labels, - }; -} - -async function translateWithTask( - text: string, - targetLang = 'CHT', -): Promise { - const task = await mitPreprocess.createTranslateTask({ - query: text, - target_lang: targetLang, - translator: 'gpt4', - }); - const result = await mitPreprocess.waitTranslateTask(task.data.task_id); - return result[0] || ''; -} - -async function* startTranslateFile( - image: File, - running: RefObject, -): AsyncGenerator<{ - progress?: string; - failed?: FailureResults; - detectTextResult?: unknown; - ocrResult?: unknown; - translateResult?: unknown; - result?: LPFile; -}> { - let uploaded; - yield { progress: 'uploading' }; - try { - uploaded = await mitPreprocess.uploadImg(image); - } catch (e: unknown) { - yield { - failed: e as FailureResults, - }; - return; - } - yield { progress: 'extracting text lines' }; - const { filename } = uploaded.data; - - let detectTextResult; - try { - const task = await mitPreprocess.createImgTask( - filename, - 'mit_detect_text', - {}, - ); - detectTextResult = await mitPreprocess.waitImgTask<{ - textlines: { - prob: number; - pts: BBox[]; - text: string; - // textlines: any[]; // FIXME why did server return this? - }[]; - }>(task.data.task_id); - } catch (e: unknown) { - yield { - failed: e as FailureResults, - }; - return; - } - - yield { progress: 'recognizing text lines' }; - let ocrResult; - try { - const created = await mitPreprocess.createImgTask(filename, 'mit_ocr', { - regions: detectTextResult.textlines, - }); - ocrResult = await mitPreprocess.waitImgTask< - { - pts: BBox[]; - text: string; - textlines: string[]; - }[] - >(created.data.task_id); - console.debug('ocrResult', ocrResult); - } catch (e: unknown) { - yield { - failed: e as FailureResults, - }; - return; - } - - yield { progress: 'translating' }; - let translateResult: string[]; - try { - const limiter = ResourcePool.multiple([1, 2, 3, 4]); - translateResult = await Promise.all( - ocrResult.map((textBlock) => - limiter.use(() => translateWithTask(textBlock.text)), - ), - ); - } catch (e: unknown) { - yield { - failed: e as FailureResults, - }; - return; - } - - const textQuads: TextQuad[] = ocrResult.map((textBlock, i) => ({ - pts: textBlock.pts, - raw_text: textBlock.text, - translated: translateResult[i] ?? '', - })); - - const lpFile = buildLpFile(image, await measureImgSize(image), textQuads); - - yield { - result: lpFile, - }; -} - -async function translateFile(image: File, imageIndex: number): Promise { - try { - for await (const fileProgress of startTranslateFile(image, { - current: true, - })) { - console.debug( - `translating file #${imageIndex} / ${image.name}`, - 'step', - fileProgress, - ); - if (fileProgress.result) { - return fileProgress.result; - } else if (fileProgress.failed) { - throw fileProgress.failed; - } // else: continue - } - } catch (e) { - console.error(`failed translating file #${imageIndex} / ${image.name}`, e); - return { - file_name: image.name, - labels: [], - }; - } - throw new Error(`should not be here`); -} - -async function startOcr( - files: File[], - onProgress?: (finished: number, total: number) => void, -): Promise { - const limiter = ResourcePool.multiple([1, 2]); - - const translations = await Promise.all( - files.map((f, i) => - limiter.use(async () => { - const lpFile = await translateFile(f, i); - onProgress?.(i + 1, files.length); - return lpFile; - }), - ), - ); - const zipBlob = await createMoeflowProjectZip( - { - name: `${files[0]!.name}`, - intro: `这是由<萌翻+Mit demo>生成的项目. https://moeflow-mit-poc.voxscape.io/temp/mit-preprocess`, - default_role: 'supporter', - allow_apply_type: 3, - application_check_type: 1, - is_need_check_application: true, - source_language: 'ja', - output_language: 'zh-TW', - }, - translations.map((lp, i) => ({ lp, image: files[i] })), - ); - return new File( - [zipBlob], - `moeflow-project-${Date.now()}-${files[0]!.name}.zip`, - ); -} - -interface DemoWorkingState { - nonce: string; - numPages: number; - finished: number; -} - -export const DemoOcrFiles: FC<{}> = (props) => { - const [working, setWorking] = useState(null); - const [origFiles, setOrigFiles] = useState(() => []); - const [error, setError] = useState(null); - const [translated, setTranslated] = useState(null); - const filePondRef = useRef(null); - - const onStartOcr = async (files: File[]) => { - try { - const initState = { - nonce: `${Math.random()}`, - numPages: files.length, - finished: 0, - }; - setWorking(initState); - setTranslated( - await startOcr(files, (finished, total) => - setWorking((s) => - s?.nonce === initState.nonce - ? { - ...s, - finished: Math.max(s.finished, finished), - numPages: total, - } - : s, - ), - ), - ); - } catch (e: any) { - alert(e?.message || 'error'); - console.error(e); - } finally { - setWorking(null); - } - }; - return ( -
- 0} - ref={(value) => (filePondRef.current = value)} - css={css` - display: none; - `} - allowMultiple - acceptedFileTypes={['image/*', '.png', '.jpg']} - onupdatefiles={(_files) => { - const files = _files.map((f) => f.file) as File[]; - console.debug('onaddfile', files); - if (!(files.length > 0 && files.length <= MAX_FILE_COUNT)) { - setError(`一次最多只能上传${MAX_FILE_COUNT}张图片`); - setOrigFiles([]); - filePondRef.current!.removeFiles(); - } else { - setOrigFiles(files); - setError(null); - } - }} - /> - - - -
- ); -}; diff --git a/src/services/ai/mit_preprocess.ts b/src/services/ai/mit_preprocess.ts deleted file mode 100644 index f1574a5..0000000 --- a/src/services/ai/mit_preprocess.ts +++ /dev/null @@ -1,97 +0,0 @@ -import { request } from '../../apis'; -import { uploadRequest } from '../../apis/_request'; -import { wait } from '@jokester/ts-commonutil/lib/concurrency/timing'; - -const mitApiPrefix = `/v1/mit`; - -export type CoordPair = [number, number]; // x, y in non-normalized pixels -export type BBox = [CoordPair, CoordPair, CoordPair, CoordPair]; // left-top, right-top, right-bottom, left-bottom - -export interface TextQuad { - pts: BBox[]; - raw_text: string; - translated: string; -} - -async function uploadImg(file: File) { - const formData = new FormData(); - formData.append('file', file); - - return uploadRequest<{ filename: string }>(formData, { - method: 'POST', - url: `${mitApiPrefix}/images`, - }); -} - -async function createImgTask( - filename: string, - taskName: 'mit_ocr' | 'mit_detect_text', - payload: object, -) { - return request<{ task_id: string }>({ - method: 'POST', - url: `${mitApiPrefix}/image-tasks`, - data: { - task_name: taskName, - filename, - ...payload, - }, - }); -} - -interface TaskState { - task_id: string; - status: 'success' | 'pending' | 'fail'; - result?: Result; - message?: string; -} - -async function waitImgTask(taskId: string) { - while (true) { - const r = await request>({ - method: 'GET', - url: `${mitApiPrefix}/image-tasks/${taskId}`, - }); - if (r.data.status === 'success') { - return r.data.result!; - } else if (r.data.status === 'pending') { - await wait(2e3); - } else { - throw new Error(`task failed: ${r.data.message ?? 'unknown'}`); - } - } -} - -async function createTranslateTask(payload: object) { - return request<{ task_id: string }>({ - method: 'POST', - url: `${mitApiPrefix}/translate-tasks`, - data: { - ...payload, - }, - }); -} - -async function waitTranslateTask(taskId: string) { - while (true) { - const r = await request>({ - method: 'GET', - url: `${mitApiPrefix}/translate-tasks/${taskId}`, - }); - if (r.data.status === 'success') { - return r.data.result!; - } else if (r.data.status === 'pending') { - await wait(1e3); - } else { - throw new Error(`task failed: ${r.data.message ?? 'unknown'}`); - } - } -} - -export const mitPreprocess = { - uploadImg, - createImgTask, - waitImgTask, - createTranslateTask, - waitTranslateTask, -} as const; diff --git a/src/services/ai/multimodal_recognize.ts b/src/services/ai/multimodal_recognize.ts index 46bf401..4de688a 100644 --- a/src/services/ai/multimodal_recognize.ts +++ b/src/services/ai/multimodal_recognize.ts @@ -1,10 +1,13 @@ +import z from "zod"; +import { generateObject, GenerateObjectOptions, UserMessage} from 'xsai' + interface MultimodalModelConf { provider: string; model: string; baseUrl: string; } -export const multimodalPresets: readonly MultimodalModelConf[] = [ +export const multimodalPresets: readonly Readonly[] = [ // gemini: // see https://ai.google.dev/gemini-api/docs/openai { @@ -19,4 +22,36 @@ export const multimodalPresets: readonly MultimodalModelConf[] = [ }, ]; -export async function x(); +const fileRecognizeResultSchema = z.object({ + imageW: z.number({message: 'the width of the image in PX'}), + imageH: z.number({message: 'the height of the image in PX'}), + texts: z.array(z.object({ + left: z.number({message: 'left coordinate of the text in PX, in the whole image'}), + top: z.number({message: 'top coordinate of the text in PX, in the whole image'}), + width: z.number({message: 'width of the text in PX'}), + height: z.number({message: 'height of the text in PX'}), + textLines: z.array(z.string(), {message: 'the text lines'}), + text: z.string({message: 'concatencated text'}), + tranalated: z.string({message: 'translated text'}), + comment: z.string({message: 'additional comment of the text, or the translation'}), + })) +}) + +export async function recognizeFile(apiKey: string, conf: MultimodalModelConf, msg: UserMessage, abortSignal?: AbortSignal): Promise> { + const generateConf: GenerateObjectOptions = { + messages: [ + { content: 'You are a helpful assistant. Please do as user instructs.', role: 'system' }, + msg, + ], + schema: fileRecognizeResultSchema, + baseURL: conf.baseUrl, + model: conf.model, + apiKey, + } + const res = await generateObject({ + ...generateConf, + abortSignal + }) + return res.object +} + diff --git a/src/services/ai/use_moeflow_companion.ts b/src/services/ai/use_moeflow_companion.ts deleted file mode 100644 index 582b798..0000000 --- a/src/services/ai/use_moeflow_companion.ts +++ /dev/null @@ -1,107 +0,0 @@ -import { useState, useRef } from 'react'; -import { Client } from '@gradio/client'; -import { useAsyncEffect } from '@jokester/ts-commonutil/lib/react/hook/use-async-effect'; -import { useSelector } from 'react-redux'; -import { AppState } from '@/store'; -import { createDebugLogger } from '@/utils/debug-logger'; -import { RuntimeConfig } from '@/configs'; - -export const moeflowCompanionServiceState = { - disabled: 'disabled', - connecting: 'connecting', - connected: 'connected', - disconnected: 'disconnected', -} as const; - -const debugLogger = createDebugLogger('service:moeflow_companion'); - -export interface MoeflowCompanionService { - client: Client; - serviceConf: RuntimeConfig['moeflowCompanion']; - multimodalTranslate: typeof multimodalTranslate; -} - -export function useMoeflowCompanion(): [ - string, - MoeflowCompanionService | null, -] { - const serviceRef = useRef(null); - const [clientState, setClientState] = useState( - moeflowCompanionServiceState.connecting, - ); - const serviceConf = useSelector( - (s: AppState) => s.site.runtimeConfig.moeflowCompanion, - ); - - useAsyncEffect( - async (_, released) => { - if ( - !( - serviceConf && - serviceConf.gradioUrl && - serviceConf.defaultMultimodalModel - ) - ) { - serviceRef.current = null; - setClientState(moeflowCompanionServiceState.disabled); - return; - } - try { - const client = await Client.connect(serviceConf.gradioUrl); - serviceRef.current = { - client, - multimodalTranslate, - serviceConf, - }; - setClientState(moeflowCompanionServiceState.connected); - released.then(() => client.close()); - } catch (e) { - debugLogger('error connecting', e, serviceConf.gradioUrl); - serviceRef.current = null; - setClientState(moeflowCompanionServiceState.disconnected); - } - }, - [serviceConf], - ); - return [clientState, serviceRef.current] as const; -} - -async function multimodalTranslate( - client: Client, - files: Blob[], - targetLang: string, - model: string, -): Promise { - // const uploadRes = await client.upload_files(hfSpaceUrl, files) - // files.forEach(file => formData.append('files[]', file)); - // debugLogger('Upload response:', uploadRes); - const predictRes = await client.predict( - '/multimodal_llm_translate_file_api', - { - gradio_temp_files: files, // uploadRes.files!.map(handle_file), - model, - target_language: targetLang, - }, - ); - const [{ files: translated }] = predictRes.data as MoeflowMultimodalResData; - - debugLogger('Predict response:', predictRes, translated); - return translated; -} -export interface TranslatedFile { - local_path: string; - image_w: number; - image_h: number; - text_blocks: Array<{ - left: number; - top: number; - right: number; - bottom: number; - source: string; - translated: string; - }>; -} -/** - * the type in gradio https://github.com/moeflow-com/manga-image-translator/blob/moeflow-companion-main/moeflow_companion/gradio/multimodal.py#L62 - */ -type MoeflowMultimodalResData = [{ files: TranslatedFile[] }]; diff --git a/src/services/labelplus_packager.ts b/src/services/labelplus_packager.ts deleted file mode 100644 index 5da4389..0000000 --- a/src/services/labelplus_packager.ts +++ /dev/null @@ -1,80 +0,0 @@ -import * as zip from '@zip.js/zip.js'; - -export interface LPLabel { - x: number; // normalized - y: number; // normalized - position_type: number; // int , always 1 ? - translation: string; // singleline -} - -export interface LPFile { - file_name: string; // img filename (basename) - labels: LPLabel[]; -} - -function serializeIntoLabelplusFormat(files: LPFile[]): string[] { - return files.flatMap((file) => [ - `>>>>[${file.file_name}]<<<<`, - ...file.labels.flatMap((l, labelIndex) => [ - `----[${labelIndex}]----[${l.x},${l.y},${l.position_type}]`, - l.translation, - ]), - ]); -} - -type LANG_CODE = 'ja' | 'en' | 'zh-CN' | 'zh-TW'; - -interface MoeflowProjectMeta { - name: string; - intro: string; - default_role: 'supporter'; - allow_apply_type: 3; - application_check_type: 1; - is_need_check_application: boolean; - // create_time: string; - // edit_time: string; - source_language: 'ja'; - // target_languages: LANG_CODE[]; - // output_id: string; - output_language: LANG_CODE; -} - -export interface MoeflowImageFile { - lp: LPFile; - image: Blob; -} - -/** - * see moeflow-backend "TeamProjectImportAPI" - * @return a zip file for importing into moeflow-backend - */ -export async function createMoeflowProjectZip( - meta: MoeflowProjectMeta, - files: MoeflowImageFile[], -): Promise { - const zipWriter = new zip.ZipWriter(new zip.BlobWriter('application/zip'), { - bufferedWrite: true, - level: 9, - }); - - { - const translationsTxt = - serializeIntoLabelplusFormat(files.map((f) => f.lp)).join('\n') + '\n'; - const blob = new Blob([translationsTxt], { type: 'text/plain' }); - await zipWriter.add('translations.txt', new zip.BlobReader(blob)); - } - - for (const f of files) { - await zipWriter.add( - `images/${f.lp.file_name}`, - new zip.BlobReader(f.image), - ); - } - - await zipWriter.add( - 'project.json', - new zip.TextReader(JSON.stringify(meta, null, 2)), - ); - - return zipWriter.close(); -} From 81678a382499efc88db8c7d1dbf988f1edcd57ae Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Tue, 2 Sep 2025 02:49:22 +0900 Subject: [PATCH 03/13] wip --- ...iTranslate.tsx => BatchTranslateModal.tsx} | 95 ++------- src/components/ai/ModelConfigForm.tsx | 182 ++++++++++++++++++ src/components/ai/index.tsx | 68 +++++++ src/services/ai/multimodal_recognize.ts | 78 +++++--- 4 files changed, 317 insertions(+), 106 deletions(-) rename src/components/ai/{FileListAiTranslate.tsx => BatchTranslateModal.tsx} (74%) create mode 100644 src/components/ai/ModelConfigForm.tsx create mode 100644 src/components/ai/index.tsx diff --git a/src/components/ai/FileListAiTranslate.tsx b/src/components/ai/BatchTranslateModal.tsx similarity index 74% rename from src/components/ai/FileListAiTranslate.tsx rename to src/components/ai/BatchTranslateModal.tsx index 42bbad8..964842c 100644 --- a/src/components/ai/FileListAiTranslate.tsx +++ b/src/components/ai/BatchTranslateModal.tsx @@ -1,77 +1,22 @@ -import { Modal } from 'antd'; -import { FC, File as MFile, Target } from '@/interfaces'; -import { - useMoeflowCompanion, - moeflowCompanionServiceState, - MoeflowCompanionService, - TranslatedFile, -} from '@/services/ai/use_moeflow_companion'; -import { useAsyncEffect } from '@jokester/ts-commonutil/lib/react/hook/use-async-effect'; -import { createDebugLogger } from '@/utils/debug-logger'; -import { api, resultTypes } from '@/apis'; +import { FC } from 'react'; +import { File as MFile } from '@/interfaces'; +import { Target } from '@/interfaces'; import { useIntl } from 'react-intl'; -import { ModalStaticFunctions } from 'antd/lib/modal/confirm'; import { useState } from 'react'; import { ResourcePool } from '@jokester/ts-commonutil/lib/concurrency/resource-pool-basic'; import { getCancelToken } from '@/utils/api'; +import { ModalStaticFunctions } from 'antd/lib/modal/confirm'; +import { useAsyncEffect } from '@jokester/ts-commonutil/lib/react/hook/use-async-effect'; +import { createDebugLogger } from '@/utils/debug-logger'; +import { api, resultTypes } from '@/apis'; import { toLowerCamelCase } from '@/utils'; +import { + multimodalPresets, + recognizeFile, +} from '@/services/ai/multimodal_recognize'; +import { ModalHandle } from '.'; -const debugLogger = createDebugLogger('components:project:FileListAiTranslate'); - -type ModalHandle = ReturnType; - -interface TranslatorFunc { - (files: MFile[], target: Target): void; -} -function openTranslateModal( - files: MFile[], - target: Target, - service: MoeflowCompanionService, - modal: ModalStaticFunctions, -) { - const handle = modal.confirm({ - content: ( - handle} - /> - ), - okButtonProps: { disabled: true }, - onOk: () => { - console.log('ok'); - }, - onCancel: () => { - console.log('cancel'); - }, - }); -} - -export function useMoeflowCompanionAiTranslate(): - | [true, TranslatorFunc, React.ReactNode] - | [false, null, null] { - const [serviceState, service] = useMoeflowCompanion(); - const [modal, contextHolder] = Modal.useModal(); - - debugLogger('service', serviceState, service); - if (serviceState !== moeflowCompanionServiceState.connected) { - return [false, null, null]; - } - - return [ - true, - (files, target) => - openTranslateModal( - files, - target, - service!, - modal as ModalStaticFunctions, - ), - contextHolder, - ]; -} - +const debugLogger = createDebugLogger('components:ai:BatchTranslateModal'); interface TranslateTaskState { file: MFile; status: string; @@ -81,21 +26,17 @@ function clipTo01(x: number) { return Math.max(0, Math.min(1, x)); } -const ModalContent: FC<{ - service: MoeflowCompanionService; +export const BatchTranslateModalContent: FC<{ files: MFile[]; target: Target; getHandle(): ModalHandle; -}> = ({ - service: { client, serviceConf, multimodalTranslate }, - files, - target, - getHandle, -}) => { +}> = ({ files, target, getHandle }) => { const intl = useIntl(); const [fileStates, setFileStates] = useState(() => files.map((file) => ({ file, status: 'waiting' })), ); + + async function startWork() {} useAsyncEffect(async (running, released) => { const [cancelToken, fillCancelToken] = getCancelToken(); const fileLimiter = ResourcePool.multiple([1, 2]); @@ -233,3 +174,5 @@ const ModalContent: FC<{ ); }; + +const WorkModalContent: FC<{}> = (props) => {}; diff --git a/src/components/ai/ModelConfigForm.tsx b/src/components/ai/ModelConfigForm.tsx new file mode 100644 index 0000000..a87adba --- /dev/null +++ b/src/components/ai/ModelConfigForm.tsx @@ -0,0 +1,182 @@ +import React, { useEffect } from 'react'; +import { + Form, + Input, + Select, + Button, + Card, + Space, + Divider, + Typography, + message, +} from 'antd'; +import { PlusOutlined, DeleteOutlined, SaveOutlined } from '@ant-design/icons'; + +const { Option } = Select; +const { Title, Text } = Typography; + +interface MultimodalModelConf { + provider: string; + model: string; + baseUrl: string; +} + +interface ModelConfigFormProps { + initialValue?: MultimodalModelConf; + onSave?: (config: MultimodalModelConf) => void; + onCancel?: () => void; + loading?: boolean; +} + +export const ModelConfigForm: React.FC = ({ + initialValue, + onSave, + onCancel, + loading = false, +}) => { + const [form] = Form.useForm(); + + useEffect(() => { + if (initialValue) { + form.setFieldsValue(initialValue); + } + }, [initialValue, form]); + + const handleSubmit = async (values: MultimodalModelConf) => { + try { + if (onSave) { + await onSave(values); + message.success('Configuration saved successfully'); + } + } catch (error) { + message.error('Failed to save configuration'); + console.error('Save error:', error); + } + }; + + const handleCancel = () => { + if (onCancel) { + onCancel(); + } + }; + + const handleReset = () => { + form.resetFields(); + if (initialValue) { + form.setFieldsValue(initialValue); + } + }; + + return ( + +
+ + + + + + + + + + + + + + + + + + + +
+ + + +
+ Preset Configurations + + Common configurations for popular AI providers: + + +
+ Google Gemini: +
+
• Provider: gemini
+
• Model: gemini-2.5-flash, gemini-2.5-pro
+
+ • Base URL: + https://generativelanguage.googleapis.com/v1beta/openai/ +
+
+
+ +
+ OpenAI: +
+
• Provider: openai
+
• Model: gpt-4-vision-preview, gpt-4o
+
• Base URL: https://api.openai.com/v1/
+
+
+ +
+ Anthropic: +
+
• Provider: anthropic
+
• Model: claude-3-5-sonnet, claude-3-opus
+
• Base URL: https://api.anthropic.com/v1/
+
+
+
+
+ ); +}; + +ModelConfigForm; diff --git a/src/components/ai/index.tsx b/src/components/ai/index.tsx new file mode 100644 index 0000000..b9495ce --- /dev/null +++ b/src/components/ai/index.tsx @@ -0,0 +1,68 @@ +import { Modal } from 'antd'; +import { File as MFile, Target } from '@/interfaces'; +import { createDebugLogger } from '@/utils/debug-logger'; +import { ModalStaticFunctions } from 'antd/lib/modal/confirm'; + +import { ModelConfigForm } from './ModelConfigForm'; +import { BatchTranslateModalContent } from './BatchTranslateModal'; + +const debugLogger = createDebugLogger('components:project:FileListAiTranslate'); + +export type ModalHandle = ReturnType; + +interface TranslatorFunc { + (files: MFile[], target: Target, onSaved?: (f: MFile) => void): void; +} +async function openTranslateModal( + files: MFile[], + target: Target, + modal: ModalStaticFunctions, +) { + const modelConfigured = await new Promise((resolve, reject) => { + const handle = modal.confirm({ + content: , + okText: `start auto translate`, + onOk: () => { + resolve(true); + }, + onCancel: () => { + resolve(false); + }, + }); + }); + if (!modelConfigured) { + return; + } + + const f = await new Promise((resolove, reject) => { + const handle = modal.confirm({ + content: ( + handle as ModalHandle} + /> + ), + okButtonProps: { disabled: true }, + onOk: () => { + console.log('ok'); + }, + onCancel: () => { + console.log('cancel'); + }, + }); + }); +} + +export function useMoeflowCompanionAiTranslate(): + | [true, TranslatorFunc, React.ReactNode] + | [false, null, null] { + const [modal, contextHolder] = Modal.useModal(); + + return [ + true, + (files, target) => + openTranslateModal(files, target, modal as ModalStaticFunctions), + contextHolder, + ]; +} diff --git a/src/services/ai/multimodal_recognize.ts b/src/services/ai/multimodal_recognize.ts index 4de688a..9f69957 100644 --- a/src/services/ai/multimodal_recognize.ts +++ b/src/services/ai/multimodal_recognize.ts @@ -1,5 +1,5 @@ -import z from "zod"; -import { generateObject, GenerateObjectOptions, UserMessage} from 'xsai' +import z from 'zod'; +import { generateObject, GenerateObjectOptions, UserMessage } from 'xsai'; interface MultimodalModelConf { provider: string; @@ -23,35 +23,53 @@ export const multimodalPresets: readonly Readonly[] = [ ]; const fileRecognizeResultSchema = z.object({ - imageW: z.number({message: 'the width of the image in PX'}), - imageH: z.number({message: 'the height of the image in PX'}), - texts: z.array(z.object({ - left: z.number({message: 'left coordinate of the text in PX, in the whole image'}), - top: z.number({message: 'top coordinate of the text in PX, in the whole image'}), - width: z.number({message: 'width of the text in PX'}), - height: z.number({message: 'height of the text in PX'}), - textLines: z.array(z.string(), {message: 'the text lines'}), - text: z.string({message: 'concatencated text'}), - tranalated: z.string({message: 'translated text'}), - comment: z.string({message: 'additional comment of the text, or the translation'}), - })) -}) + imageW: z.number({ message: 'the width of the image in PX' }), + imageH: z.number({ message: 'the height of the image in PX' }), + texts: z.array( + z.object({ + left: z.number({ + message: 'left coordinate of the text in PX, in the whole image', + }), + top: z.number({ + message: 'top coordinate of the text in PX, in the whole image', + }), + width: z.number({ message: 'width of the text in PX' }), + height: z.number({ message: 'height of the text in PX' }), + textLines: z.array(z.string(), { message: 'the text lines' }), + text: z.string({ message: 'concatencated text' }), + tranalated: z.string({ message: 'translated text' }), + comment: z.string({ + message: 'additional comment of the text, or the translation', + }), + }), + ), +}); -export async function recognizeFile(apiKey: string, conf: MultimodalModelConf, msg: UserMessage, abortSignal?: AbortSignal): Promise> { - const generateConf: GenerateObjectOptions = { - messages: [ - { content: 'You are a helpful assistant. Please do as user instructs.', role: 'system' }, - msg, - ], - schema: fileRecognizeResultSchema, - baseURL: conf.baseUrl, - model: conf.model, - apiKey, - } +export type FileRecognizeResult = z.infer; + +export async function recognizeFile( + apiKey: string, + conf: MultimodalModelConf, + msg: UserMessage, + abortSignal?: AbortSignal, +): Promise> { + const generateConf: GenerateObjectOptions = + { + messages: [ + { + content: 'You are a helpful assistant. Please do as user instructs.', + role: 'system', + }, + msg, + ], + schema: fileRecognizeResultSchema, + baseURL: conf.baseUrl, + model: conf.model, + apiKey, + }; const res = await generateObject({ ...generateConf, - abortSignal - }) - return res.object + abortSignal, + }); + return res.object; } - From d4a7aa9f4ce26e1b1035a9de26d2035658326c0d Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 3 Sep 2025 00:32:20 +0900 Subject: [PATCH 04/13] wip --- src/components/ai/BatchTranslateModal.tsx | 10 +- src/components/ai/ModelConfigForm.tsx | 218 +++++++++--------- src/components/ai/index.tsx | 101 ++++---- src/components/project/FileList.tsx | 13 +- ...timodal_recognize.ts => llm_preprocess.ts} | 23 +- src/store/user/sagas.ts | 5 + 6 files changed, 199 insertions(+), 171 deletions(-) rename src/services/ai/{multimodal_recognize.ts => llm_preprocess.ts} (76%) diff --git a/src/components/ai/BatchTranslateModal.tsx b/src/components/ai/BatchTranslateModal.tsx index 964842c..0f2ba5f 100644 --- a/src/components/ai/BatchTranslateModal.tsx +++ b/src/components/ai/BatchTranslateModal.tsx @@ -5,15 +5,11 @@ import { useIntl } from 'react-intl'; import { useState } from 'react'; import { ResourcePool } from '@jokester/ts-commonutil/lib/concurrency/resource-pool-basic'; import { getCancelToken } from '@/utils/api'; -import { ModalStaticFunctions } from 'antd/lib/modal/confirm'; import { useAsyncEffect } from '@jokester/ts-commonutil/lib/react/hook/use-async-effect'; import { createDebugLogger } from '@/utils/debug-logger'; import { api, resultTypes } from '@/apis'; import { toLowerCamelCase } from '@/utils'; -import { - multimodalPresets, - recognizeFile, -} from '@/services/ai/multimodal_recognize'; +import { llmPresets, llmPreprocessFile } from '@/services/ai/llm_preprocess'; import { ModalHandle } from '.'; const debugLogger = createDebugLogger('components:ai:BatchTranslateModal'); @@ -27,6 +23,8 @@ function clipTo01(x: number) { } export const BatchTranslateModalContent: FC<{ + modelConf: MultimodalModelConf; + files: MFile[]; target: Target; getHandle(): ModalHandle; @@ -96,7 +94,7 @@ export const BatchTranslateModalContent: FC<{ return; } - const result = await multimodalTranslate( + const result = await llmPreprocessFile( client, [imgBlob], target.language.enName, diff --git a/src/components/ai/ModelConfigForm.tsx b/src/components/ai/ModelConfigForm.tsx index a87adba..b9aaa3e 100644 --- a/src/components/ai/ModelConfigForm.tsx +++ b/src/components/ai/ModelConfigForm.tsx @@ -10,87 +10,129 @@ import { Typography, message, } from 'antd'; -import { PlusOutlined, DeleteOutlined, SaveOutlined } from '@ant-design/icons'; +import { SaveOutlined } from '@ant-design/icons'; +import * as LlmService from '@/services/ai/llm_preprocess'; const { Option } = Select; const { Title, Text } = Typography; -interface MultimodalModelConf { - provider: string; - model: string; - baseUrl: string; -} - interface ModelConfigFormProps { - initialValue?: MultimodalModelConf; - onSave?: (config: MultimodalModelConf) => void; - onCancel?: () => void; - loading?: boolean; + initialValue?: LlmService.LLMConf; + onChange?: (config: LlmService.LLMConf) => void; } export const ModelConfigForm: React.FC = ({ initialValue, - onSave, - onCancel, - loading = false, + onChange, }) => { const [form] = Form.useForm(); + // Find matching preset index for initial value + const findPresetIndex = (config: LlmService.LLMConf): number => { + const index = LlmService.llmPresets.findIndex( + preset => + preset.model === config.model && + preset.baseUrl === config.baseUrl + ); + return index >= 0 ? index : -1; // -1 for custom + }; + useEffect(() => { if (initialValue) { - form.setFieldsValue(initialValue); + const presetIndex = findPresetIndex(initialValue); + form.setFieldsValue({ + preset: presetIndex, + model: initialValue.model, + baseUrl: initialValue.baseUrl, + apiKey: initialValue.apiKey, + }); } }, [initialValue, form]); - const handleSubmit = async (values: MultimodalModelConf) => { - try { - if (onSave) { - await onSave(values); - message.success('Configuration saved successfully'); - } - } catch (error) { - message.error('Failed to save configuration'); - console.error('Save error:', error); + // Handle preset selection change + const handlePresetChange = (presetIndex: number) => { + if (presetIndex >= 0 && presetIndex < LlmService.llmPresets.length) { + const preset = LlmService.llmPresets[presetIndex]; + form.setFieldsValue({ + model: preset.model, + baseUrl: preset.baseUrl, + }); } + // For custom preset (index -1), don't auto-fill fields }; - const handleCancel = () => { - if (onCancel) { - onCancel(); + // Handle form values change + const handleFormChange = (changedValues: any, allValues: any) => { + // Check if model or baseUrl was changed and update preset accordingly + if (changedValues.model !== undefined || changedValues.baseUrl !== undefined) { + const currentModel = allValues.model || changedValues.model; + const currentBaseUrl = allValues.baseUrl || changedValues.baseUrl; + + // Find matching preset + const matchingPresetIndex = LlmService.llmPresets.findIndex( + preset => preset.model === currentModel && preset.baseUrl === currentBaseUrl + ); + + // Update preset to match the current values + if (matchingPresetIndex >= 0) { + // Found a matching preset, switch to it + if (allValues.preset !== matchingPresetIndex) { + form.setFieldValue('preset', matchingPresetIndex); + } + } else { + // No preset matches, set to custom (-1) + if (allValues.preset !== -1) { + form.setFieldValue('preset', -1); + } + } } - }; - - const handleReset = () => { - form.resetFields(); - if (initialValue) { - form.setFieldsValue(initialValue); + + const values = form.getFieldsValue(); + if (values.model && values.baseUrl) { + // Get provider from selected preset if available + let provider = ''; + if (values.preset >= 0 && values.preset < LlmService.llmPresets.length) { + provider = LlmService.llmPresets[values.preset].provider; + } + + const config: LlmService.LLMConf = { + provider, + model: values.model, + baseUrl: values.baseUrl, + apiKey: values.apiKey, + }; + onChange?.(config); } }; - return ( - +
+ Configure LLM Model +

+ Please provide the LLM API configuration used to translate the images. +

+

+ The LLM API should use the OpenAI-compatible format and API key authencation. The model + should support image input and structured output. +

+

This configuration is only used and saved inside in your browser.

- - + {LlmService.llmPresets.map((preset, i) => ( + + ))} + @@ -101,13 +143,13 @@ export const ModelConfigForm: React.FC = ({ > = ({ > - - - - - - + + - -
- Preset Configurations - - Common configurations for popular AI providers: - - -
- Google Gemini: -
-
• Provider: gemini
-
• Model: gemini-2.5-flash, gemini-2.5-pro
-
- • Base URL: - https://generativelanguage.googleapis.com/v1beta/openai/ -
-
-
- -
- OpenAI: -
-
• Provider: openai
-
• Model: gpt-4-vision-preview, gpt-4o
-
• Base URL: https://api.openai.com/v1/
-
-
- -
- Anthropic: -
-
• Provider: anthropic
-
• Model: claude-3-5-sonnet, claude-3-opus
-
• Base URL: https://api.anthropic.com/v1/
-
-
-
- +
); }; -ModelConfigForm; diff --git a/src/components/ai/index.tsx b/src/components/ai/index.tsx index b9495ce..a31cb61 100644 --- a/src/components/ai/index.tsx +++ b/src/components/ai/index.tsx @@ -5,64 +5,77 @@ import { ModalStaticFunctions } from 'antd/lib/modal/confirm'; import { ModelConfigForm } from './ModelConfigForm'; import { BatchTranslateModalContent } from './BatchTranslateModal'; +import { useCallback, useMemo } from 'react'; +import { LLMConf } from '@/services/ai/llm_preprocess'; const debugLogger = createDebugLogger('components:project:FileListAiTranslate'); export type ModalHandle = ReturnType; -interface TranslatorFunc { - (files: MFile[], target: Target, onSaved?: (f: MFile) => void): void; +interface TranslatorApi { + start( + onFileSaved: (f: MFile) => void, + onConfigured?: () => void, + ): Promise; } -async function openTranslateModal( +function bind( files: MFile[], target: Target, modal: ModalStaticFunctions, -) { - const modelConfigured = await new Promise((resolve, reject) => { - const handle = modal.confirm({ - content: , - okText: `start auto translate`, - onOk: () => { - resolve(true); - }, - onCancel: () => { - resolve(false); - }, +): TranslatorApi { + return { + start, + }; + async function start() { + const modelConfigured = await new Promise((resolve, reject) => { + const handle = modal.confirm({ + style: { width: 600 }, + icon: null, + content: , + okText: `start auto translate`, + onOk: () => { + resolve(true); + }, + onCancel: () => { + resolve(false); + }, + }); }); - }); - if (!modelConfigured) { - return; - } + if (!modelConfigured) { + return; + } - const f = await new Promise((resolove, reject) => { - const handle = modal.confirm({ - content: ( - handle as ModalHandle} - /> - ), - okButtonProps: { disabled: true }, - onOk: () => { - console.log('ok'); - }, - onCancel: () => { - console.log('cancel'); - }, + const f = await new Promise((resolove, reject) => { + const handle = modal.confirm({ + content: ( + handle as ModalHandle} + /> + ), + okButtonProps: { disabled: true }, + onOk: () => { + console.log('ok'); + }, + onCancel: () => { + console.log('cancel'); + }, + }); }); - }); + } } -export function useMoeflowCompanionAiTranslate(): - | [true, TranslatorFunc, React.ReactNode] - | [false, null, null] { +export function useAiTranslate( + files: MFile[], + target: Target, +): [true, TranslatorApi, React.ReactNode] | [false, null, null] { const [modal, contextHolder] = Modal.useModal(); - return [ - true, - (files, target) => - openTranslateModal(files, target, modal as ModalStaticFunctions), - contextHolder, - ]; + const api = useMemo( + () => bind(files, target, modal as ModalStaticFunctions), + [files, target, modal], + ); + + return [true, api, contextHolder]; } diff --git a/src/components/project/FileList.tsx b/src/components/project/FileList.tsx index 0cedc7a..a8e74ee 100644 --- a/src/components/project/FileList.tsx +++ b/src/components/project/FileList.tsx @@ -29,6 +29,7 @@ import { routes } from '@/pages/routes'; import { ListPageSpec } from '@/components/shared/List'; import { FilePondFile } from 'filepond'; import { createDebugLogger } from '@/utils/debug-logger'; +import { useAiTranslate } from '@/components/ai'; /** 文件列表的属性接口 */ interface FileListProps { @@ -70,6 +71,10 @@ export const FileList: FC = ({ const [spinningIDs, setSpinningIDs] = useState([]); // 删除请求中 const filePondRef = useRef(); const currentPageSpecRef = useRef(null); + const [aiEnabled, aiTranslateApi, aiModalHolder] = useAiTranslate( + items, + target, + ); const defaultPage = useSelector( (state: AppState) => state.file.filesState.page, @@ -378,12 +383,17 @@ export const FileList: FC = ({ ? formatMessage({ id: 'project.changeTarget' }) + ' - ' : '') + target?.language.i18nName} - {false && ( + {aiEnabled && aiTranslateApi && ( @@ -579,6 +589,7 @@ export const FileList: FC = ({ selectedFileIds={selectedFileIds} /> + {aiModalHolder} ); }; diff --git a/src/services/ai/multimodal_recognize.ts b/src/services/ai/llm_preprocess.ts similarity index 76% rename from src/services/ai/multimodal_recognize.ts rename to src/services/ai/llm_preprocess.ts index 9f69957..ff06a28 100644 --- a/src/services/ai/multimodal_recognize.ts +++ b/src/services/ai/llm_preprocess.ts @@ -1,28 +1,29 @@ import z from 'zod'; import { generateObject, GenerateObjectOptions, UserMessage } from 'xsai'; -interface MultimodalModelConf { +export interface LLMConf { provider: string; model: string; baseUrl: string; + apiKey?: string; } -export const multimodalPresets: readonly Readonly[] = [ +export const llmPresets: readonly Readonly[] = [ // gemini: // see https://ai.google.dev/gemini-api/docs/openai { - provider: 'gemini', + provider: 'Google', model: 'gemini-2.5-flash', baseUrl: 'https://generativelanguage.googleapis.com/v1beta/openai/', }, { - provider: 'gemini', + provider: 'Google', model: 'gemini-2.5-pro', baseUrl: 'https://generativelanguage.googleapis.com/v1beta/openai/', }, ]; -const fileRecognizeResultSchema = z.object({ +const FilePreprocessResultSchema = z.object({ imageW: z.number({ message: 'the width of the image in PX' }), imageH: z.number({ message: 'the height of the image in PX' }), texts: z.array( @@ -45,15 +46,15 @@ const fileRecognizeResultSchema = z.object({ ), }); -export type FileRecognizeResult = z.infer; +export type FilePreprocessResult = z.infer; -export async function recognizeFile( +export async function llmPreprocessFile( apiKey: string, - conf: MultimodalModelConf, + conf: LLMConf, msg: UserMessage, abortSignal?: AbortSignal, -): Promise> { - const generateConf: GenerateObjectOptions = +): Promise> { + const generateConf: GenerateObjectOptions = { messages: [ { @@ -62,7 +63,7 @@ export async function recognizeFile( }, msg, ], - schema: fileRecognizeResultSchema, + schema: FilePreprocessResultSchema, baseURL: conf.baseUrl, model: conf.model, apiKey, diff --git a/src/store/user/sagas.ts b/src/store/user/sagas.ts index 105a062..45e1319 100644 --- a/src/store/user/sagas.ts +++ b/src/store/user/sagas.ts @@ -11,6 +11,11 @@ function* getUserInfoAsync(action: ReturnType) { const token = action.payload.token; const instance: Axios = yield api.getAxiosInstance(); if (token === '') { + if (process.env.NODE_ENV === 'development') { + // do nothing in dev: vite hot reloading may create APIClient multiple times, + // causing 401 and an empty token being set + return; + } // 清除 Axios Authorization 头 delete instance.defaults.headers.common['Authorization']; // 清除 Cookie token From 595ce0e97e09c10bc1fa2bd4e6442c097e69a93f Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 3 Sep 2025 00:46:01 +0900 Subject: [PATCH 05/13] get model config modal working --- src/components/ai/ModelConfigForm.tsx | 94 +++++++++++++-------------- src/components/ai/index.tsx | 46 +++++++++---- src/components/project/FileList.tsx | 1 - 3 files changed, 76 insertions(+), 65 deletions(-) diff --git a/src/components/ai/ModelConfigForm.tsx b/src/components/ai/ModelConfigForm.tsx index b9aaa3e..87698b6 100644 --- a/src/components/ai/ModelConfigForm.tsx +++ b/src/components/ai/ModelConfigForm.tsx @@ -30,9 +30,8 @@ export const ModelConfigForm: React.FC = ({ // Find matching preset index for initial value const findPresetIndex = (config: LlmService.LLMConf): number => { const index = LlmService.llmPresets.findIndex( - preset => - preset.model === config.model && - preset.baseUrl === config.baseUrl + (preset) => + preset.model === config.model && preset.baseUrl === config.baseUrl, ); return index >= 0 ? index : -1; // -1 for custom }; @@ -64,45 +63,47 @@ export const ModelConfigForm: React.FC = ({ // Handle form values change const handleFormChange = (changedValues: any, allValues: any) => { // Check if model or baseUrl was changed and update preset accordingly - if (changedValues.model !== undefined || changedValues.baseUrl !== undefined) { + if ( + changedValues.model !== undefined || + changedValues.baseUrl !== undefined + ) { const currentModel = allValues.model || changedValues.model; const currentBaseUrl = allValues.baseUrl || changedValues.baseUrl; - - // Find matching preset - const matchingPresetIndex = LlmService.llmPresets.findIndex( - preset => preset.model === currentModel && preset.baseUrl === currentBaseUrl - ); - - // Update preset to match the current values - if (matchingPresetIndex >= 0) { - // Found a matching preset, switch to it - if (allValues.preset !== matchingPresetIndex) { - form.setFieldValue('preset', matchingPresetIndex); - } - } else { - // No preset matches, set to custom (-1) - if (allValues.preset !== -1) { - form.setFieldValue('preset', -1); - } + + // Find matching preset + const matchingPresetIndex = LlmService.llmPresets.findIndex( + (preset) => + preset.model === currentModel && preset.baseUrl === currentBaseUrl, + ); + + // Update preset to match the current values + if (matchingPresetIndex >= 0) { + // Found a matching preset, switch to it + if (allValues.preset !== matchingPresetIndex) { + form.setFieldValue('preset', matchingPresetIndex); + } + } else { + // No preset matches, set to custom (-1) + if (allValues.preset !== -1) { + form.setFieldValue('preset', -1); } + } } - + const values = form.getFieldsValue(); - if (values.model && values.baseUrl) { - // Get provider from selected preset if available - let provider = ''; - if (values.preset >= 0 && values.preset < LlmService.llmPresets.length) { - provider = LlmService.llmPresets[values.preset].provider; - } - - const config: LlmService.LLMConf = { - provider, - model: values.model, - baseUrl: values.baseUrl, - apiKey: values.apiKey, - }; - onChange?.(config); + // Get provider from selected preset if available + let provider = ''; + if (values.preset >= 0 && values.preset < LlmService.llmPresets.length) { + provider = LlmService.llmPresets[values.preset].provider; } + + const config: LlmService.LLMConf = { + provider, + model: values.model, + baseUrl: values.baseUrl, + apiKey: values.apiKey, + }; + onChange?.(config); }; return (
@@ -111,17 +112,14 @@ export const ModelConfigForm: React.FC = ({ Please provide the LLM API configuration used to translate the images.

- The LLM API should use the OpenAI-compatible format and API key authencation. The model - should support image input and structured output. + The LLM API should use the OpenAI-compatible format and API key + authencation. The model should support image input and structured + output.

This configuration is only used and saved inside in your browser.

-
- - @@ -168,10 +166,7 @@ export const ModelConfigForm: React.FC = ({ name="apiKey" rules={[{ required: true, message: 'Please enter your API key' }]} > - +
@@ -179,4 +174,3 @@ export const ModelConfigForm: React.FC = ({
); }; - diff --git a/src/components/ai/index.tsx b/src/components/ai/index.tsx index a31cb61..790c2e2 100644 --- a/src/components/ai/index.tsx +++ b/src/components/ai/index.tsx @@ -17,6 +17,7 @@ interface TranslatorApi { onFileSaved: (f: MFile) => void, onConfigured?: () => void, ): Promise; + testModel(modelConf: LLMConf): Promise<{ worked: boolean; message: string }>; } function bind( files: MFile[], @@ -25,22 +26,39 @@ function bind( ): TranslatorApi { return { start, + testModel, }; + async function testModel( + modelConf: LLMConf, + ): Promise<{ worked: boolean; message: string }> { + return { worked: true, message: 'test model worked' }; + } + async function start() { - const modelConfigured = await new Promise((resolve, reject) => { - const handle = modal.confirm({ - style: { width: 600 }, - icon: null, - content: , - okText: `start auto translate`, - onOk: () => { - resolve(true); - }, - onCancel: () => { - resolve(false); - }, - }); - }); + const modelConfigured = await new Promise( + (resolve, reject) => { + let modelConf: LLMConf | null = null; + const onChange = (conf: LLMConf) => { + debugLogger('model configured', conf); + modelConf = conf; + if (modelConf.model && modelConf.baseUrl && modelConf.apiKey) { + handle.update({okButtonProps: {}}); + } + }; + const handle = modal.confirm({ + icon: null, + content: , + okText: `Start translate`, + okButtonProps: { disabled: true }, + onOk: () => { + resolve(modelConf); + }, + onCancel: () => { + resolve(null); + }, + }); + }, + ); if (!modelConfigured) { return; } diff --git a/src/components/project/FileList.tsx b/src/components/project/FileList.tsx index a8e74ee..ac5d658 100644 --- a/src/components/project/FileList.tsx +++ b/src/components/project/FileList.tsx @@ -65,7 +65,6 @@ export const FileList: FC = ({ const [outputDrawerVisible, setOutputDrawerVisible] = useState(false); const coverWidth = IMAGE_COVER.WIDTH; const coverHeight = IMAGE_COVER.HEIGHT; - // const [aiTranslateAvailable, startAiTranslate, modalContextHolder] = useMoeflowCompanionAiTranslate(); const [items, setItems] = useState([]); const [spinningIDs, setSpinningIDs] = useState([]); // 删除请求中 From 8ae7208e77d1bb9ed82befd3c5e9f5a6d06a836d Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 3 Sep 2025 01:13:00 +0900 Subject: [PATCH 06/13] connect new flow to UI --- src/components/ai/BatchTranslateModal.tsx | 82 ++++++++++++++--------- src/components/ai/index.tsx | 59 +++++++--------- src/components/project/FileList.tsx | 10 +-- src/services/ai/llm_preprocess.ts | 12 +++- 4 files changed, 91 insertions(+), 72 deletions(-) diff --git a/src/components/ai/BatchTranslateModal.tsx b/src/components/ai/BatchTranslateModal.tsx index 0f2ba5f..b93fb38 100644 --- a/src/components/ai/BatchTranslateModal.tsx +++ b/src/components/ai/BatchTranslateModal.tsx @@ -9,8 +9,14 @@ import { useAsyncEffect } from '@jokester/ts-commonutil/lib/react/hook/use-async import { createDebugLogger } from '@/utils/debug-logger'; import { api, resultTypes } from '@/apis'; import { toLowerCamelCase } from '@/utils'; -import { llmPresets, llmPreprocessFile } from '@/services/ai/llm_preprocess'; +import { + testModel, + llmPreprocessFile, + LLMConf, + FilePreprocessResult, +} from '@/services/ai/llm_preprocess'; import { ModalHandle } from '.'; +import { UserMessage } from 'xsai'; const debugLogger = createDebugLogger('components:ai:BatchTranslateModal'); interface TranslateTaskState { @@ -23,12 +29,12 @@ function clipTo01(x: number) { } export const BatchTranslateModalContent: FC<{ - modelConf: MultimodalModelConf; + llmConf: LLMConf; files: MFile[]; target: Target; getHandle(): ModalHandle; -}> = ({ files, target, getHandle }) => { +}> = ({ files, target, getHandle, llmConf }) => { const intl = useIntl(); const [fileStates, setFileStates] = useState(() => files.map((file) => ({ file, status: 'waiting' })), @@ -83,9 +89,7 @@ export const BatchTranslateModalContent: FC<{ if (resData.sourceCount) { setFileState(f, 'skip: source count not 0'); } - const imgBlob = await fetch(resData.url!, { - // mode: 'no-cors', - }).then( + const imgBlob = await fetch(resData.url!, {}).then( (r) => r.blob(), () => null, ); @@ -94,21 +98,32 @@ export const BatchTranslateModalContent: FC<{ return; } - const result = await llmPreprocessFile( - client, - [imgBlob], - target.language.enName, - serviceConf!.defaultMultimodalModel!, - ).catch((e) => { - debugLogger('translate failed', e); - return []; - }); - debugLogger('translate result', result); + const userMessage: UserMessage = { + role: 'user', + content: [ + { + type: 'text', + text: `Please translate the image to ${target.language.enName}. ${llmConf.extraPrompt || ''}`, + }, + { + type: 'image_url', + image_url: { + url: await img2dataurl(imgBlob), + }, + }, + ], + }; - const [r] = result; + const result = await llmPreprocessFile(llmConf, userMessage).catch( + (e) => { + debugLogger('translate failed', e); + return null; + }, + ); + debugLogger('translate result', result); - if (r) { - await saveTranslations(f, r); + if (result) { + await saveTranslations(f, result); } else { setFileState(f, 'error: translate failed'); } @@ -116,15 +131,15 @@ export const BatchTranslateModalContent: FC<{ async function saveTextBlock( f: MFile, - tf: TranslatedFile, - tb: TranslatedFile['text_blocks'][number], + tf: FilePreprocessResult, + tb: FilePreprocessResult['texts'][number], ) { const src = await api.source.createSource({ fileID: f.id, data: { - x: clipTo01((tb.left + tb.right) / 2 / tf.image_w), - y: clipTo01((tb.top + tb.bottom) / 2 / tf.image_h), - content: tb.source, + x: clipTo01((tb.left + tb.width / 2) / tf.imageW), + y: clipTo01((tb.top + tb.height / 2) / tf.imageH), + content: tb.text, }, configs: { cancelToken }, }); @@ -138,21 +153,18 @@ export const BatchTranslateModalContent: FC<{ }); } - async function saveTranslations(f: MFile, r: TranslatedFile) { - if (r.text_blocks.length === 0) { + async function saveTranslations(f: MFile, r: FilePreprocessResult) { + if (r.texts.length === 0) { setFileState(f, 'done: no text blocks'); } setFileState(f, 'saving'); try { await Promise.all( - r.text_blocks.map((tb) => + r.texts.map((tb) => moeflowApiLimiter.use(() => saveTextBlock(f, r, tb)), ), ); - setFileState( - f, - `success: translated ${r.text_blocks.length} text marks`, - ); + setFileState(f, `success: translated ${r.texts.length} text marks`); } catch (e) { debugLogger('save text block failed', e); setFileState(f, 'save file failed'); @@ -173,4 +185,10 @@ export const BatchTranslateModalContent: FC<{ ); }; -const WorkModalContent: FC<{}> = (props) => {}; +async function img2dataurl(img: Blob) { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onloadend = () => resolve(reader.result as string); + reader.readAsDataURL(img); + }); +} diff --git a/src/components/ai/index.tsx b/src/components/ai/index.tsx index 790c2e2..53478a2 100644 --- a/src/components/ai/index.tsx +++ b/src/components/ai/index.tsx @@ -6,7 +6,7 @@ import { ModalStaticFunctions } from 'antd/lib/modal/confirm'; import { ModelConfigForm } from './ModelConfigForm'; import { BatchTranslateModalContent } from './BatchTranslateModal'; import { useCallback, useMemo } from 'react'; -import { LLMConf } from '@/services/ai/llm_preprocess'; +import { LLMConf, testModel } from '@/services/ai/llm_preprocess'; const debugLogger = createDebugLogger('components:project:FileListAiTranslate'); @@ -28,38 +28,30 @@ function bind( start, testModel, }; - async function testModel( - modelConf: LLMConf, - ): Promise<{ worked: boolean; message: string }> { - return { worked: true, message: 'test model worked' }; - } - async function start() { - const modelConfigured = await new Promise( - (resolve, reject) => { - let modelConf: LLMConf | null = null; - const onChange = (conf: LLMConf) => { - debugLogger('model configured', conf); - modelConf = conf; - if (modelConf.model && modelConf.baseUrl && modelConf.apiKey) { - handle.update({okButtonProps: {}}); - } - }; - const handle = modal.confirm({ - icon: null, - content: , - okText: `Start translate`, - okButtonProps: { disabled: true }, - onOk: () => { - resolve(modelConf); - }, - onCancel: () => { - resolve(null); - }, - }); - }, - ); - if (!modelConfigured) { + const llmConf = await new Promise((resolve, reject) => { + let confValue: LLMConf | null = null; + const onChange = (conf: LLMConf) => { + debugLogger('model configured', conf); + confValue = conf; + if (confValue.model && confValue.baseUrl && confValue.apiKey) { + handle.update({ okButtonProps: {} }); + } + }; + const handle = modal.confirm({ + icon: null, + content: , + okText: `Start translate`, + okButtonProps: { disabled: true }, + onOk: () => { + resolve(confValue); + }, + onCancel: () => { + resolve(null); + }, + }); + }); + if (!llmConf) { return; } @@ -67,6 +59,7 @@ function bind( const handle = modal.confirm({ content: ( handle as ModalHandle} @@ -92,7 +85,7 @@ export function useAiTranslate( const api = useMemo( () => bind(files, target, modal as ModalStaticFunctions), - [files, target, modal], + [target.id, files.map((file) => file.id).join('|')], ); return [true, api, contextHolder]; diff --git a/src/components/project/FileList.tsx b/src/components/project/FileList.tsx index ac5d658..d3715ae 100644 --- a/src/components/project/FileList.tsx +++ b/src/components/project/FileList.tsx @@ -70,10 +70,6 @@ export const FileList: FC = ({ const [spinningIDs, setSpinningIDs] = useState([]); // 删除请求中 const filePondRef = useRef(); const currentPageSpecRef = useRef(null); - const [aiEnabled, aiTranslateApi, aiModalHolder] = useAiTranslate( - items, - target, - ); const defaultPage = useSelector( (state: AppState) => state.file.filesState.page, @@ -87,6 +83,12 @@ export const FileList: FC = ({ const selectedFileIds = useSelector( (state: AppState) => state.file.filesState.selectedFileIds, ); + const [aiEnabled, aiTranslateApi, aiModalHolder] = useAiTranslate( + [...new Set(selectedFileIds)] + .map((id) => items.find((item) => item.id === id)) + .filter(Boolean) as MFile[], + target, + ); const openInTranslator = (file: MFile) => { history.push(routes.imageTranslator.build(file.id, target.id)); diff --git a/src/services/ai/llm_preprocess.ts b/src/services/ai/llm_preprocess.ts index ff06a28..9a02a33 100644 --- a/src/services/ai/llm_preprocess.ts +++ b/src/services/ai/llm_preprocess.ts @@ -6,6 +6,7 @@ export interface LLMConf { model: string; baseUrl: string; apiKey?: string; + extraPrompt?: string; } export const llmPresets: readonly Readonly[] = [ @@ -38,7 +39,7 @@ const FilePreprocessResultSchema = z.object({ height: z.number({ message: 'height of the text in PX' }), textLines: z.array(z.string(), { message: 'the text lines' }), text: z.string({ message: 'concatencated text' }), - tranalated: z.string({ message: 'translated text' }), + translated: z.string({ message: 'translated text' }), comment: z.string({ message: 'additional comment of the text, or the translation', }), @@ -48,8 +49,13 @@ const FilePreprocessResultSchema = z.object({ export type FilePreprocessResult = z.infer; +export async function testModel( + modelConf: LLMConf, +): Promise<{ worked: boolean; message: string }> { + return { worked: true, message: 'test model worked' }; +} + export async function llmPreprocessFile( - apiKey: string, conf: LLMConf, msg: UserMessage, abortSignal?: AbortSignal, @@ -66,7 +72,7 @@ export async function llmPreprocessFile( schema: FilePreprocessResultSchema, baseURL: conf.baseUrl, model: conf.model, - apiKey, + apiKey: conf.apiKey, }; const res = await generateObject({ ...generateConf, From cc95f0cad0f44dce49cbdc9c9c347eeea15d8f3e Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 10 Sep 2025 02:14:39 +0900 Subject: [PATCH 07/13] fix --- src/components/ai/ModelConfigForm.tsx | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/components/ai/ModelConfigForm.tsx b/src/components/ai/ModelConfigForm.tsx index 87698b6..fc6b038 100644 --- a/src/components/ai/ModelConfigForm.tsx +++ b/src/components/ai/ModelConfigForm.tsx @@ -3,19 +3,11 @@ import { Form, Input, Select, - Button, - Card, - Space, Divider, Typography, - message, } from 'antd'; -import { SaveOutlined } from '@ant-design/icons'; import * as LlmService from '@/services/ai/llm_preprocess'; -const { Option } = Select; -const { Title, Text } = Typography; - interface ModelConfigFormProps { initialValue?: LlmService.LLMConf; onChange?: (config: LlmService.LLMConf) => void; @@ -52,10 +44,12 @@ export const ModelConfigForm: React.FC = ({ const handlePresetChange = (presetIndex: number) => { if (presetIndex >= 0 && presetIndex < LlmService.llmPresets.length) { const preset = LlmService.llmPresets[presetIndex]; - form.setFieldsValue({ + const patch = { model: preset.model, baseUrl: preset.baseUrl, - }); + } + form.setFieldsValue(patch); + handleFormChange(patch, form.getFieldsValue()); } // For custom preset (index -1), don't auto-fill fields }; @@ -107,7 +101,7 @@ export const ModelConfigForm: React.FC = ({ }; return (
- Configure LLM Model + Configure LLM Model

Please provide the LLM API configuration used to translate the images.

@@ -124,13 +118,13 @@ export const ModelConfigForm: React.FC = ({ onChange={handlePresetChange} > {LlmService.llmPresets.map((preset, i) => ( - + ))} - + @@ -141,7 +135,6 @@ export const ModelConfigForm: React.FC = ({ > From a5a1122ea015eee15b6fc1dd507f0db04ec4c5ec Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 10 Sep 2025 02:14:52 +0900 Subject: [PATCH 08/13] fix --- src/components/ai/index.tsx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/components/ai/index.tsx b/src/components/ai/index.tsx index 53478a2..8199395 100644 --- a/src/components/ai/index.tsx +++ b/src/components/ai/index.tsx @@ -5,7 +5,7 @@ import { ModalStaticFunctions } from 'antd/lib/modal/confirm'; import { ModelConfigForm } from './ModelConfigForm'; import { BatchTranslateModalContent } from './BatchTranslateModal'; -import { useCallback, useMemo } from 'react'; +import { useMemo } from 'react'; import { LLMConf, testModel } from '@/services/ai/llm_preprocess'; const debugLogger = createDebugLogger('components:project:FileListAiTranslate'); @@ -55,8 +55,9 @@ function bind( return; } - const f = await new Promise((resolove, reject) => { + const finished = await new Promise((resolve, reject) => { const handle = modal.confirm({ + icon: null, content: ( { - console.log('ok'); + resolve(true) }, onCancel: () => { - console.log('cancel'); + resolve(false) }, }); }); @@ -85,6 +86,7 @@ export function useAiTranslate( const api = useMemo( () => bind(files, target, modal as ModalStaticFunctions), + // eslint-disable-next-line react-hooks/exhaustive-deps [target.id, files.map((file) => file.id).join('|')], ); From 4ab443ac52d6e5459a4f3fbae721645981a341b9 Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 10 Sep 2025 02:15:14 +0900 Subject: [PATCH 09/13] workaround for gemini model --- src/services/ai/llm_preprocess.ts | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/services/ai/llm_preprocess.ts b/src/services/ai/llm_preprocess.ts index 9a02a33..d6e5da4 100644 --- a/src/services/ai/llm_preprocess.ts +++ b/src/services/ai/llm_preprocess.ts @@ -1,5 +1,8 @@ import z from 'zod'; import { generateObject, GenerateObjectOptions, UserMessage } from 'xsai'; +import { createDebugLogger } from '@/utils/debug-logger'; + +const debugLogger = createDebugLogger('services:ai:llm_preprocess'); export interface LLMConf { provider: string; @@ -78,5 +81,16 @@ export async function llmPreprocessFile( ...generateConf, abortSignal, }); + let ret = res.object; + if (conf.model?.startsWith('gemini-')) { + debugLogger('gemini workaround: set coords to 1000 scale'); + ret = { + ...ret, + // workaround: gemini returns coords in [0, 1000] scale + // see https://ai.google.dev/gemini-api/docs/image-understanding + imageH: 1000, + imageW: 1000, + } + } return res.object; } From 6061dd702d3920c16393a4a34dc9a0ceddfa513c Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 10 Sep 2025 02:40:23 +0900 Subject: [PATCH 10/13] revise translate modal --- src/components/ai/BatchTranslateModal.tsx | 75 +++++++++++++++-------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/src/components/ai/BatchTranslateModal.tsx b/src/components/ai/BatchTranslateModal.tsx index b93fb38..088fb76 100644 --- a/src/components/ai/BatchTranslateModal.tsx +++ b/src/components/ai/BatchTranslateModal.tsx @@ -10,37 +10,44 @@ import { createDebugLogger } from '@/utils/debug-logger'; import { api, resultTypes } from '@/apis'; import { toLowerCamelCase } from '@/utils'; import { - testModel, llmPreprocessFile, LLMConf, FilePreprocessResult, } from '@/services/ai/llm_preprocess'; import { ModalHandle } from '.'; import { UserMessage } from 'xsai'; +import { Icon } from '../icon'; const debugLogger = createDebugLogger('components:ai:BatchTranslateModal'); -interface TranslateTaskState { +interface FileProgress { file: MFile; - status: string; + icon: React.ReactNode | string + message?: React.ReactNode | string; } function clipTo01(x: number) { return Math.max(0, Math.min(1, x)); } +const stateIcons = { + waiting: , + working: , + skip: , + error: , + success: , +} as const + export const BatchTranslateModalContent: FC<{ llmConf: LLMConf; - files: MFile[]; target: Target; getHandle(): ModalHandle; }> = ({ files, target, getHandle, llmConf }) => { const intl = useIntl(); - const [fileStates, setFileStates] = useState(() => - files.map((file) => ({ file, status: 'waiting' })), + const [fileStates, setFileStates] = useState(() => + files.map((file): FileProgress => ({ file, icon: stateIcons.waiting, message: 'waiting' })), ); - async function startWork() {} useAsyncEffect(async (running, released) => { const [cancelToken, fillCancelToken] = getCancelToken(); const fileLimiter = ResourcePool.multiple([1, 2]); @@ -53,48 +60,54 @@ export const BatchTranslateModalContent: FC<{ debugLogger('canceled'); return; } - const tasksEnded = Promise.allSettled([ + released = released.then(() => { + debugLogger('released') + }) + const tasksEnded = Promise.allSettled( files.map((f, idx) => fileLimiter.use(() => translateFile(f, idx))), - ]); + ); const cancelled = await Promise.race([ released.then(() => true), tasksEnded.then(() => false), ]); + debugLogger('cancelled', cancelled); if (!cancelled) { const handle = getHandle(); handle.update({ okButtonProps: { disabled: false } }); } return; - function setFileState(f: MFile, status: string) { + function setFileState(f: MFile, message: string, icon: React.ReactNode) { + debugLogger('setFileState', f.id, message); setFileStates((prev) => - prev.map((state) => (state.file === f ? { ...state, status } : state)), + prev.map((state) => (state.file === f ? { ...state, message, icon } : state)), ); } async function translateFile(f: MFile, idx: number) { - setFileState(f, 'working'); + setFileState(f, 'working', stateIcons.working); if (![undefined, null, 'success'].includes(f.uploadState)) { - setFileState(f, 'skip: upload not finished'); + setFileState(f, 'skip: upload not finished', stateIcons.skip); return; } const refetchRes = await api.file - .getFile({ fileID: f.id }) + .getFile({ fileID: f.id, configs: { cancelToken } }) .catch(() => null); if (refetchRes?.type !== resultTypes.SUCCESS) { - setFileState(f, 'skip: fetch file failed'); + setFileState(f, 'skip: fetch file failed', stateIcons.error); return; } const resData = toLowerCamelCase(refetchRes.data); if (resData.sourceCount) { - setFileState(f, 'skip: source count not 0'); + setFileState(f, 'skip: already has source', stateIcons.skip); + return } - const imgBlob = await fetch(resData.url!, {}).then( + const imgBlob = await fetch(resData.url!, {signal: abort.signal }).then( (r) => r.blob(), () => null, ); if (!imgBlob) { - setFileState(f, 'skip: fetch image blob failed'); + setFileState(f, 'error: fetch image blob failed', stateIcons.error); return; } @@ -114,6 +127,8 @@ export const BatchTranslateModalContent: FC<{ ], }; + setFileState(f, 'translating', stateIcons.working); + const result = await llmPreprocessFile(llmConf, userMessage).catch( (e) => { debugLogger('translate failed', e); @@ -121,11 +136,14 @@ export const BatchTranslateModalContent: FC<{ }, ); debugLogger('translate result', result); + if (!running.current) { + return + } if (result) { await saveTranslations(f, result); } else { - setFileState(f, 'error: translate failed'); + setFileState(f, 'error: translate failed', stateIcons.error); } } @@ -149,35 +167,38 @@ export const BatchTranslateModalContent: FC<{ content: tb.translated, targetID: target.id, }, - configs: { cancelToken }, + // not using the cancel token, to make the saving operation closer to atomic + // configs: { cancelToken }, }); } async function saveTranslations(f: MFile, r: FilePreprocessResult) { if (r.texts.length === 0) { - setFileState(f, 'done: no text blocks'); + setFileState(f, 'done: no text blocks', stateIcons.skip); } - setFileState(f, 'saving'); + setFileState(f, 'saving', stateIcons.working); try { await Promise.all( r.texts.map((tb) => moeflowApiLimiter.use(() => saveTextBlock(f, r, tb)), ), ); - setFileState(f, `success: translated ${r.texts.length} text marks`); + setFileState(f, `success: recognized ${r.texts.length} text marks`, stateIcons.success); } catch (e) { debugLogger('save text block failed', e); - setFileState(f, 'save file failed'); + setFileState(f, 'save file failed', stateIcons.error); } } }, []); return (
- {files.length} files to translate +

Translating {files.length} files with LLM. Closing this dialog will stop translating.

    {fileStates.map((state) => ( -
  • - {state.file.name} - {state.status} +
  • + + {state.icon} + {state.file.name} - {state.message}
  • ))}
From 2097d5cbb48e7d9ffbad4801ef1c005007d8f096 Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 10 Sep 2025 02:50:26 +0900 Subject: [PATCH 11/13] fix zod model --- src/services/ai/llm_preprocess.ts | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/services/ai/llm_preprocess.ts b/src/services/ai/llm_preprocess.ts index d6e5da4..0296881 100644 --- a/src/services/ai/llm_preprocess.ts +++ b/src/services/ai/llm_preprocess.ts @@ -32,20 +32,20 @@ const FilePreprocessResultSchema = z.object({ imageH: z.number({ message: 'the height of the image in PX' }), texts: z.array( z.object({ - left: z.number({ - message: 'left coordinate of the text in PX, in the whole image', - }), - top: z.number({ - message: 'top coordinate of the text in PX, in the whole image', - }), - width: z.number({ message: 'width of the text in PX' }), - height: z.number({ message: 'height of the text in PX' }), - textLines: z.array(z.string(), { message: 'the text lines' }), - text: z.string({ message: 'concatencated text' }), - translated: z.string({ message: 'translated text' }), - comment: z.string({ - message: 'additional comment of the text, or the translation', - }), + left: z + .number() + .describe('left coordinate of the text in PX, in the whole image'), + top: z + .number() + .describe('top coordinate of the text in PX, in the whole image'), + width: z.number().describe('width of the text in PX'), + height: z.number().describe('height of the text in PX'), + textLines: z.array(z.string()).describe('the text lines'), + text: z.string().describe('concatenated text'), + translated: z.string().describe('translated text'), + comment: z + .string() + .describe('additional comment of the text, or the translation'), }), ), }); @@ -90,7 +90,7 @@ export async function llmPreprocessFile( // see https://ai.google.dev/gemini-api/docs/image-understanding imageH: 1000, imageW: 1000, - } + }; } return res.object; } From b3584648fe5fdbac3f2282e104a5a0bac664b3f9 Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Wed, 10 Sep 2025 02:51:05 +0900 Subject: [PATCH 12/13] format code --- src/components/ai/BatchTranslateModal.tsx | 42 +++++++++++++++-------- src/components/ai/ModelConfigForm.tsx | 10 ++---- src/components/ai/index.tsx | 6 ++-- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/src/components/ai/BatchTranslateModal.tsx b/src/components/ai/BatchTranslateModal.tsx index 088fb76..a4a9b92 100644 --- a/src/components/ai/BatchTranslateModal.tsx +++ b/src/components/ai/BatchTranslateModal.tsx @@ -21,7 +21,7 @@ import { Icon } from '../icon'; const debugLogger = createDebugLogger('components:ai:BatchTranslateModal'); interface FileProgress { file: MFile; - icon: React.ReactNode | string + icon: React.ReactNode | string; message?: React.ReactNode | string; } @@ -35,7 +35,7 @@ const stateIcons = { skip: , error: , success: , -} as const +} as const; export const BatchTranslateModalContent: FC<{ llmConf: LLMConf; @@ -45,7 +45,13 @@ export const BatchTranslateModalContent: FC<{ }> = ({ files, target, getHandle, llmConf }) => { const intl = useIntl(); const [fileStates, setFileStates] = useState(() => - files.map((file): FileProgress => ({ file, icon: stateIcons.waiting, message: 'waiting' })), + files.map( + (file): FileProgress => ({ + file, + icon: stateIcons.waiting, + message: 'waiting', + }), + ), ); useAsyncEffect(async (running, released) => { @@ -61,8 +67,8 @@ export const BatchTranslateModalContent: FC<{ return; } released = released.then(() => { - debugLogger('released') - }) + debugLogger('released'); + }); const tasksEnded = Promise.allSettled( files.map((f, idx) => fileLimiter.use(() => translateFile(f, idx))), ); @@ -80,7 +86,9 @@ export const BatchTranslateModalContent: FC<{ function setFileState(f: MFile, message: string, icon: React.ReactNode) { debugLogger('setFileState', f.id, message); setFileStates((prev) => - prev.map((state) => (state.file === f ? { ...state, message, icon } : state)), + prev.map((state) => + state.file === f ? { ...state, message, icon } : state, + ), ); } @@ -100,9 +108,9 @@ export const BatchTranslateModalContent: FC<{ const resData = toLowerCamelCase(refetchRes.data); if (resData.sourceCount) { setFileState(f, 'skip: already has source', stateIcons.skip); - return + return; } - const imgBlob = await fetch(resData.url!, {signal: abort.signal }).then( + const imgBlob = await fetch(resData.url!, { signal: abort.signal }).then( (r) => r.blob(), () => null, ); @@ -137,7 +145,7 @@ export const BatchTranslateModalContent: FC<{ ); debugLogger('translate result', result); if (!running.current) { - return + return; } if (result) { @@ -183,7 +191,11 @@ export const BatchTranslateModalContent: FC<{ moeflowApiLimiter.use(() => saveTextBlock(f, r, tb)), ), ); - setFileState(f, `success: recognized ${r.texts.length} text marks`, stateIcons.success); + setFileState( + f, + `success: recognized ${r.texts.length} text marks`, + stateIcons.success, + ); } catch (e) { debugLogger('save text block failed', e); setFileState(f, 'save file failed', stateIcons.error); @@ -192,12 +204,14 @@ export const BatchTranslateModalContent: FC<{ }, []); return (
-

Translating {files.length} files with LLM. Closing this dialog will stop translating.

+

+ Translating {files.length} files with LLM. Closing this dialog will stop + translating. +

    {fileStates.map((state) => ( -
  • - - {state.icon} +
  • + {state.icon} {state.file.name} - {state.message}
  • ))} diff --git a/src/components/ai/ModelConfigForm.tsx b/src/components/ai/ModelConfigForm.tsx index fc6b038..8cda57d 100644 --- a/src/components/ai/ModelConfigForm.tsx +++ b/src/components/ai/ModelConfigForm.tsx @@ -1,11 +1,5 @@ import React, { useEffect } from 'react'; -import { - Form, - Input, - Select, - Divider, - Typography, -} from 'antd'; +import { Form, Input, Select, Divider, Typography } from 'antd'; import * as LlmService from '@/services/ai/llm_preprocess'; interface ModelConfigFormProps { @@ -47,7 +41,7 @@ export const ModelConfigForm: React.FC = ({ const patch = { model: preset.model, baseUrl: preset.baseUrl, - } + }; form.setFieldsValue(patch); handleFormChange(patch, form.getFieldsValue()); } diff --git a/src/components/ai/index.tsx b/src/components/ai/index.tsx index 8199395..29cb9ee 100644 --- a/src/components/ai/index.tsx +++ b/src/components/ai/index.tsx @@ -55,7 +55,7 @@ function bind( return; } - const finished = await new Promise((resolve, reject) => { + const finished = await new Promise((resolve, reject) => { const handle = modal.confirm({ icon: null, content: ( @@ -68,10 +68,10 @@ function bind( ), okButtonProps: { disabled: true }, onOk: () => { - resolve(true) + resolve(true); }, onCancel: () => { - resolve(false) + resolve(false); }, }); }); From 34fc7ad19a7d4f8c672375f508487c29a8a86a10 Mon Sep 17 00:00:00 2001 From: Wang Guan Date: Tue, 9 Sep 2025 17:52:26 +0000 Subject: [PATCH 13/13] Apply suggestion from @coderabbitai[bot] Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- src/services/ai/llm_preprocess.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/ai/llm_preprocess.ts b/src/services/ai/llm_preprocess.ts index 0296881..fdea885 100644 --- a/src/services/ai/llm_preprocess.ts +++ b/src/services/ai/llm_preprocess.ts @@ -1,4 +1,4 @@ -import z from 'zod'; +import { z } from 'zod'; import { generateObject, GenerateObjectOptions, UserMessage } from 'xsai'; import { createDebugLogger } from '@/utils/debug-logger';