From 0228671a9cff76269715a84094ece646341b4724 Mon Sep 17 00:00:00 2001 From: Shiny-Sword Date: Fri, 17 Oct 2025 11:54:18 +0530 Subject: [PATCH] fix(image-tools): improve image URL handling and markdown formatting - Remove debug console log in socket event handler - Convert OpenAI image URLs to base64 format for better compatibility - Update image tool descriptions to include markdown format instructions - Replace MinIO URLs with internal endpoint for consistent image access --- nodejs/src/services/geminiImageTool.js | 14 +++++++++++--- nodejs/src/services/imageTool.js | 13 ++++++++++--- nodejs/src/services/langgraph.js | 17 +++++++++++------ nodejs/src/socket/rooms.js | 1 - 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/nodejs/src/services/geminiImageTool.js b/nodejs/src/services/geminiImageTool.js index e159f9c..df1590f 100644 --- a/nodejs/src/services/geminiImageTool.js +++ b/nodejs/src/services/geminiImageTool.js @@ -22,7 +22,7 @@ class CustomGeminiImageTool extends Tool { - Image Generation: "Create a photorealistic portrait of...", "Generate an image showing..." - Style Operations: "Create in watercolor style...", "Make it look futuristic..." - Scene Creation: "Show a landscape with...", "Create an interior scene..." - IMPORTANT: Always call this tool for image generation query, do not consider the message history. This tool automatically handles S3 uploads and returns S3 URLs for better user experience. Always use the S3 url returned from this tool in your output for displaying the generated image. DO NOT use this tool if the user requests to generate code based on an image input and a prompt. For such cases, use the chat tool to generate code from the image and prompt.`; + IMPORTANT: Always call this tool for image generation query, do not consider the message history. This tool automatically handles S3 uploads and returns S3 URLs for better user experience. Always use the S3 url returned from this tool in your output for displaying the generated image. DO NOT use this tool if the user requests to generate code based on an image input and a prompt. For such cases, use the chat tool to generate code from the image and prompt.After using this tool, always include the tool result (e.g. image URLs or Markdown) in your final response in this format ![Image Description](image_url).`; // Define the tool schema for function calling this.schema = { @@ -117,9 +117,17 @@ class CustomGeminiImageTool extends Tool { }); if (uploadResult.success) { + // Get the INTERNAL_ENDPOINT directly from environment variable + const internalEndpoint = process.env.INTERNAL_ENDPOINT; + // Only replace if it's a MinIO URL + let modifiedUrl = uploadResult.s3Url; + if (modifiedUrl.includes('http://minio:9000')) { + // Replace minio:9000 with the value from INTERNAL_ENDPOINT including http part + modifiedUrl = modifiedUrl.replace(/http:\/\/minio:9000/g, internalEndpoint); + } - // Return S3 URL in markdown format like imageTool.js - const s3Result = `![${query}](${uploadResult.s3Url})`; + // Return the image URL in markdown format + const s3Result = `![${query}](${modifiedUrl})`; return s3Result; } else { diff --git a/nodejs/src/services/imageTool.js b/nodejs/src/services/imageTool.js index 0994ba9..6a537a1 100644 --- a/nodejs/src/services/imageTool.js +++ b/nodejs/src/services/imageTool.js @@ -25,7 +25,7 @@ class CustomDallETool extends Tool { 1024x1024 (Square): Ideal for social media posts, profile pictures, digital artwork, and product images. 1024x1536 (Portrait): Perfect for mobile content, social media stories, and vertical ads. 1536x1024 (Landscape): Great for presentations, video thumbnails, website banners, and widescreen displays. - IMPORTANT: This tool automatically handles S3 uploads and returns S3 URLs for better user experience. DO NOT use this tool if the user requests to generate code based on an image input and a prompt. For such cases, use the chat tool to generate code from the image and prompt.`; + IMPORTANT: This tool automatically handles S3 uploads and returns S3 URLs for better user experience. DO NOT use this tool if the user requests to generate code based on an image input and a prompt. For such cases, use the chat tool to generate code from the image and prompt.After using this tool, always include the tool result (e.g. image URLs or Markdown) in your final response in this format ![Image Description](image_url).`; // Define the tool schema for function calling this.schema = { @@ -111,8 +111,15 @@ class CustomDallETool extends Tool { }); if (uploadResult.success) { - // Return S3 URL in markdown format - const s3Result = `![${query}](${uploadResult.s3Url})`; + // Get the INTERNAL_ENDPOINT directly from environment variable + const internalEndpoint = process.env.INTERNAL_ENDPOINT; + // Only replace if it's a MinIO URL + let modifiedUrl = uploadResult.s3Url; + if (modifiedUrl.includes('http://minio:9000')) { + // Replace minio:9000 with the value from INTERNAL_ENDPOINT including http part + modifiedUrl = modifiedUrl.replace(/http:\/\/minio:9000/g, internalEndpoint); + } + const s3Result = `![${query}](${modifiedUrl})`; return s3Result; } else { diff --git a/nodejs/src/services/langgraph.js b/nodejs/src/services/langgraph.js index 9f36186..e50384e 100644 --- a/nodejs/src/services/langgraph.js +++ b/nodejs/src/services/langgraph.js @@ -79,12 +79,18 @@ const MODEL_CONFIGS = { [AI_MODAL_PROVIDER.OPEN_AI]: { supportsVision: true, imageFormats: ['url'], - formatImage: (imageUrl) => ({ - type: 'image_url', - image_url: { - url: imageUrl + formatImage: async (imageUrl) => { + const result= await convertImageToBase64(imageUrl); + if (!result) { + return null; } - }) + return { + type: 'image_url', + image_url: { + url: `data:${result.mimeType};base64,${result.base64}` + } + }; + } }, [AI_MODAL_PROVIDER.ANTHROPIC]: { supportsVision: true, @@ -428,7 +434,6 @@ async function callModel(state, model, data, agentDetails = null) { content = '[Content parsing error]'; } }); - const response = await model.invoke(context); // Safe logging for response content diff --git a/nodejs/src/socket/rooms.js b/nodejs/src/socket/rooms.js index d965a43..11c64f1 100644 --- a/nodejs/src/socket/rooms.js +++ b/nodejs/src/socket/rooms.js @@ -162,7 +162,6 @@ sockets.on('connection', function (socket) { })); socket.on(SOCKET_EVENTS.LLM_RESPONSE_SEND, catchSocketAsync(async (data) => { - console.log("============LLM_RESPONSE_SEND============",data) await toolExecutor(data, socket); }));