diff --git a/core/package.json b/core/package.json index 37f2e8bd..334699db 100644 --- a/core/package.json +++ b/core/package.json @@ -59,6 +59,7 @@ "@opentelemetry/sdk-metrics": "^2.1.0", "@opentelemetry/sdk-trace-base": "^2.1.0", "@opentelemetry/sdk-trace-node": "^2.1.0", + "exa-js": "^2.12.1", "express": "^4.22.1", "google-auth-library": "^10.3.0", "js-yaml": "^4.1.1", diff --git a/core/src/common.ts b/core/src/common.ts index 13f53992..35b363cf 100644 --- a/core/src/common.ts +++ b/core/src/common.ts @@ -201,6 +201,16 @@ export type { } from './tools/base_tool.js'; export {BaseToolset, isBaseToolset} from './tools/base_toolset.js'; export type {ToolPredicate} from './tools/base_toolset.js'; +export {ExaSearchTool} from './tools/exa_search_tool.js'; +export type { + ExaCategory, + ExaContentsOptions, + ExaSearchToolArgs, + ExaSearchToolParams, + ExaSearchToolResponse, + ExaSearchToolResult, + ExaSearchType, +} from './tools/exa_search_tool.js'; export {EXIT_LOOP, ExitLoopTool} from './tools/exit_loop_tool.js'; export {FunctionTool, isFunctionTool} from './tools/function_tool.js'; export type { diff --git a/core/src/tools/exa_search_tool.ts b/core/src/tools/exa_search_tool.ts new file mode 100644 index 00000000..e28cef95 --- /dev/null +++ b/core/src/tools/exa_search_tool.ts @@ -0,0 +1,400 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import {FunctionDeclaration, Type} from '@google/genai'; +import {ContentsOptions, Exa} from 'exa-js'; + +import {getLogger} from '../utils/logger.js'; +import {BaseTool, RunAsyncToolRequest} from './base_tool.js'; + +const logger = getLogger(); + +/** + * Search types supported by the Exa /search endpoint. + * + * - `auto` (default): Exa picks between neural and keyword retrieval. + * - `fast`: Lower-latency retrieval optimized for agent loops. + * - `neural`: Embedding-based semantic search. + * - `hybrid`: Combined neural and keyword retrieval. + * - `instant`: Returns cached results with minimal latency. + */ +export type ExaSearchType = 'auto' | 'fast' | 'neural' | 'hybrid' | 'instant'; + +/** + * Categories supported by the Exa /search endpoint for topical filtering. + */ +export type ExaCategory = + | 'company' + | 'research paper' + | 'news' + | 'pdf' + | 'personal site' + | 'financial report' + | 'people'; + +/** + * Contents options that control what is returned per result. + * + * Multiple fields can be set simultaneously. Defaults to + * `{highlights: true}` when no contents options are provided, which keeps + * tool responses compact while still giving the model usable snippets. + */ +export interface ExaContentsOptions { + text?: boolean; + highlights?: boolean; + summary?: boolean; +} + +/** + * Constructor parameters for {@link ExaSearchTool}. + */ +export interface ExaSearchToolParams { + /** + * Exa API key. If omitted, the value of the `EXA_API_KEY` environment + * variable is used. The tool throws at call time if neither source is + * populated. + */ + apiKey?: string; + + /** + * Default search type used when the model does not provide one. + * Defaults to `auto`. + */ + type?: ExaSearchType; + + /** + * Default number of results returned when the model does not provide one. + * Defaults to 5. Clamped to the Exa API range of 1-100. + */ + numResults?: number; + + /** + * Default category used when the model does not provide one. + */ + category?: ExaCategory; + + /** + * Default contents options used when the model does not provide them. + * Defaults to `{highlights: true}`. + */ + contents?: ExaContentsOptions; +} + +/** + * Schema describing the arguments the model can pass to the Exa search tool. + * Kept narrower than the full Exa API surface to keep tool calls predictable. + */ +export interface ExaSearchToolArgs { + query: string; + type?: ExaSearchType; + numResults?: number; + category?: ExaCategory; + includeDomains?: string[]; + excludeDomains?: string[]; + includeText?: string[]; + excludeText?: string[]; + startPublishedDate?: string; + endPublishedDate?: string; + contents?: ExaContentsOptions; +} + +/** + * A single result returned by {@link ExaSearchTool.runAsync}. + */ +export interface ExaSearchToolResult { + title: string; + url: string; + id: string; + publishedDate?: string; + author?: string; + score?: number; + snippet: string; + text?: string; + highlights?: string[]; + summary?: string; +} + +/** + * Tool response shape returned by {@link ExaSearchTool.runAsync}. + */ +export interface ExaSearchToolResponse { + results: ExaSearchToolResult[]; +} + +const INTEGRATION_HEADER = 'x-exa-integration'; +const INTEGRATION_NAME = 'adk-js'; +const DEFAULT_NUM_RESULTS = 5; +const DEFAULT_TYPE: ExaSearchType = 'auto'; +const DEFAULT_CONTENTS: ExaContentsOptions = {highlights: true}; + +/** + * A tool that performs web search through the + * {@link https://exa.ai | Exa} AI search API. + * + * Unlike {@link './google_search_tool.js'.GoogleSearchTool} or + * {@link './vertex_ai_search_tool.js'.VertexAiSearchTool}, this tool runs on + * the client side: each call hits the Exa REST API and returns structured + * results that the model can read directly. It works with any LLM that + * supports function calling. + * + * @example + * ```ts + * import {LlmAgent, ExaSearchTool} from '@google/adk'; + * + * const agent = new LlmAgent({ + * name: 'researcher', + * model: 'gemini-2.5-flash', + * instruction: 'Use exa_search to find recent web sources before answering.', + * tools: [new ExaSearchTool()], + * }); + * ``` + * + * Authentication: set the `EXA_API_KEY` environment variable, or pass an + * `apiKey` to the constructor. + */ +export class ExaSearchTool extends BaseTool { + private readonly apiKey?: string; + private readonly defaultType: ExaSearchType; + private readonly defaultNumResults: number; + private readonly defaultCategory?: ExaCategory; + private readonly defaultContents: ExaContentsOptions; + private client?: Exa; + + constructor(params: ExaSearchToolParams = {}) { + super({ + name: 'exa_search', + description: + 'Search the web with the Exa AI search API. Returns a list of ' + + 'results with title, url, and content snippets (highlights, text, ' + + 'or summary). Useful for retrieving up-to-date information from ' + + 'the open web.', + }); + this.apiKey = params.apiKey; + this.defaultType = params.type ?? DEFAULT_TYPE; + this.defaultNumResults = clampNumResults( + params.numResults ?? DEFAULT_NUM_RESULTS, + ); + this.defaultCategory = params.category; + this.defaultContents = params.contents ?? DEFAULT_CONTENTS; + } + + override _getDeclaration(): FunctionDeclaration { + return { + name: this.name, + description: this.description, + parameters: { + type: Type.OBJECT, + properties: { + query: { + type: Type.STRING, + description: 'The natural-language search query.', + }, + type: { + type: Type.STRING, + description: + 'Search type. One of: auto, fast, neural, hybrid, instant. ' + + 'Defaults to the value configured on the tool.', + enum: ['auto', 'fast', 'neural', 'hybrid', 'instant'], + }, + numResults: { + type: Type.INTEGER, + description: + 'Number of results to return (1-100). Defaults to the value ' + + 'configured on the tool.', + }, + category: { + type: Type.STRING, + description: + 'Restrict results to a single category, e.g. news, ' + + 'research paper, company, financial report.', + enum: [ + 'company', + 'research paper', + 'news', + 'pdf', + 'personal site', + 'financial report', + 'people', + ], + }, + includeDomains: { + type: Type.ARRAY, + description: + 'Only return results from these domains (e.g. ["nytimes.com"]).', + items: {type: Type.STRING}, + }, + excludeDomains: { + type: Type.ARRAY, + description: 'Drop results from these domains.', + items: {type: Type.STRING}, + }, + includeText: { + type: Type.ARRAY, + description: + 'Only return results whose page contains these strings.', + items: {type: Type.STRING}, + }, + excludeText: { + type: Type.ARRAY, + description: 'Drop results whose page contains these strings.', + items: {type: Type.STRING}, + }, + startPublishedDate: { + type: Type.STRING, + description: + 'Only return results published on or after this ISO 8601 date.', + }, + endPublishedDate: { + type: Type.STRING, + description: + 'Only return results published on or before this ISO 8601 date.', + }, + }, + required: ['query'], + }, + }; + } + + override async runAsync({ + args, + }: RunAsyncToolRequest): Promise { + const typedArgs = args as unknown as ExaSearchToolArgs; + if (!typedArgs.query || typeof typedArgs.query !== 'string') { + throw new Error('exa_search requires a non-empty `query` string.'); + } + + const client = this.getClient(); + const contents = typedArgs.contents ?? this.defaultContents; + const numResults = clampNumResults( + typedArgs.numResults ?? this.defaultNumResults, + ); + const type = typedArgs.type ?? this.defaultType; + const category = typedArgs.category ?? this.defaultCategory; + + logger.debug( + `Running Exa search: query="${typedArgs.query}", type=${type}, ` + + `numResults=${numResults}, category=${category ?? 'none'}`, + ); + + const response = await client.search(typedArgs.query, { + type, + numResults, + contents: toSdkContents(contents), + ...(category ? {category} : {}), + ...(typedArgs.includeDomains + ? {includeDomains: typedArgs.includeDomains} + : {}), + ...(typedArgs.excludeDomains + ? {excludeDomains: typedArgs.excludeDomains} + : {}), + ...(typedArgs.includeText ? {includeText: typedArgs.includeText} : {}), + ...(typedArgs.excludeText ? {excludeText: typedArgs.excludeText} : {}), + ...(typedArgs.startPublishedDate + ? {startPublishedDate: typedArgs.startPublishedDate} + : {}), + ...(typedArgs.endPublishedDate + ? {endPublishedDate: typedArgs.endPublishedDate} + : {}), + }); + + return { + results: (response.results ?? []).map(formatResult), + }; + } + + private getClient(): Exa { + if (this.client) { + return this.client; + } + const key = this.apiKey ?? process.env['EXA_API_KEY']; + if (!key) { + throw new Error( + 'Exa API key is not configured. Set the EXA_API_KEY environment ' + + 'variable or pass `apiKey` to ExaSearchTool.', + ); + } + const client = new Exa(key); + // Tag every request so Exa can attribute usage to this integration. + const headers = ( + client as unknown as { + headers?: {set?: (key: string, value: string) => void}; + } + ).headers; + if (headers && typeof headers.set === 'function') { + headers.set(INTEGRATION_HEADER, INTEGRATION_NAME); + } + this.client = client; + return client; + } +} + +function toSdkContents(opts: ExaContentsOptions): ContentsOptions { + const out: ContentsOptions = {}; + if (opts.text === true) out.text = true; + if (opts.highlights === true) out.highlights = true; + if (opts.summary === true) out.summary = true; + return out; +} + +function clampNumResults(value: number): number { + if (!Number.isFinite(value)) { + return DEFAULT_NUM_RESULTS; + } + const rounded = Math.floor(value); + if (rounded < 1) return 1; + if (rounded > 100) return 100; + return rounded; +} + +function formatResult(result: Record): ExaSearchToolResult { + const text = + typeof result['text'] === 'string' ? (result['text'] as string) : undefined; + const highlights = Array.isArray(result['highlights']) + ? (result['highlights'] as unknown[]).filter( + (h): h is string => typeof h === 'string', + ) + : undefined; + const summary = + typeof result['summary'] === 'string' + ? (result['summary'] as string) + : undefined; + return { + title: + typeof result['title'] === 'string' ? (result['title'] as string) : '', + url: typeof result['url'] === 'string' ? (result['url'] as string) : '', + id: typeof result['id'] === 'string' ? (result['id'] as string) : '', + ...(typeof result['publishedDate'] === 'string' + ? {publishedDate: result['publishedDate'] as string} + : {}), + ...(typeof result['author'] === 'string' + ? {author: result['author'] as string} + : {}), + ...(typeof result['score'] === 'number' + ? {score: result['score'] as number} + : {}), + snippet: pickSnippet({highlights, summary, text}), + ...(text ? {text} : {}), + ...(highlights && highlights.length > 0 ? {highlights} : {}), + ...(summary ? {summary} : {}), + }; +} + +function pickSnippet(parts: { + highlights?: string[]; + summary?: string; + text?: string; +}): string { + if (parts.highlights && parts.highlights.length > 0) { + return parts.highlights.join(' ').trim(); + } + if (parts.summary) { + return parts.summary.trim(); + } + if (parts.text) { + return parts.text.slice(0, 500).trim(); + } + return ''; +} diff --git a/core/test/tools/exa_search_tool_test.ts b/core/test/tools/exa_search_tool_test.ts new file mode 100644 index 00000000..29dc5076 --- /dev/null +++ b/core/test/tools/exa_search_tool_test.ts @@ -0,0 +1,254 @@ +/** + * @license + * Copyright 2026 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import {Type} from '@google/genai'; +import {afterEach, beforeEach, describe, expect, it, vi} from 'vitest'; + +import {Context, ExaSearchTool} from '@google/adk'; + +interface FakeExaClient { + search: ReturnType; + headers: {set: ReturnType}; +} + +function makeFakeClient( + searchImpl: (...args: unknown[]) => unknown, +): FakeExaClient { + return { + search: vi.fn(searchImpl), + headers: {set: vi.fn()}, + }; +} + +function injectClient(tool: ExaSearchTool, client: FakeExaClient): void { + (tool as unknown as {client: FakeExaClient}).client = client; +} + +const ORIGINAL_API_KEY = process.env['EXA_API_KEY']; + +describe('ExaSearchTool', () => { + beforeEach(() => { + delete process.env['EXA_API_KEY']; + }); + + afterEach(() => { + if (ORIGINAL_API_KEY === undefined) { + delete process.env['EXA_API_KEY']; + } else { + process.env['EXA_API_KEY'] = ORIGINAL_API_KEY; + } + vi.restoreAllMocks(); + }); + + it('exposes a function declaration with the expected schema', () => { + const tool = new ExaSearchTool(); + const declaration = tool._getDeclaration(); + + expect(declaration?.name).toEqual('exa_search'); + expect(declaration?.description).toContain('Exa AI search API'); + expect(declaration?.parameters?.type).toEqual(Type.OBJECT); + expect(declaration?.parameters?.required).toEqual(['query']); + expect(declaration?.parameters?.properties?.['query']?.type).toEqual( + Type.STRING, + ); + expect(declaration?.parameters?.properties?.['type']?.enum).toEqual([ + 'auto', + 'fast', + 'neural', + 'hybrid', + 'instant', + ]); + }); + + it('throws a clear error when no API key is configured', async () => { + const tool = new ExaSearchTool(); + await expect( + tool.runAsync({ + args: {query: 'hello'}, + toolContext: {} as unknown as Context, + }), + ).rejects.toThrow(/EXA_API_KEY/); + }); + + it('passes query and configured defaults to the Exa client', async () => { + const fakeClient = makeFakeClient(() => + Promise.resolve({ + results: [ + { + id: 'a1', + url: 'https://example.com/a', + title: 'Result A', + highlights: ['snippet from A'], + }, + ], + }), + ); + const tool = new ExaSearchTool({ + apiKey: 'fake-key', + type: 'neural', + numResults: 3, + }); + injectClient(tool, fakeClient); + + const response = (await tool.runAsync({ + args: {query: 'agents'}, + toolContext: {} as unknown as Context, + })) as {results: Array>}; + + expect(fakeClient.search).toHaveBeenCalledTimes(1); + expect(fakeClient.search).toHaveBeenCalledWith('agents', { + type: 'neural', + numResults: 3, + contents: {highlights: true}, + }); + expect(response.results).toHaveLength(1); + expect(response.results[0]).toMatchObject({ + id: 'a1', + url: 'https://example.com/a', + title: 'Result A', + snippet: 'snippet from A', + highlights: ['snippet from A'], + }); + }); + + it('lets per-call args override constructor defaults and forwards filters', async () => { + const fakeClient = makeFakeClient(() => Promise.resolve({results: []})); + const tool = new ExaSearchTool({apiKey: 'fake-key'}); + injectClient(tool, fakeClient); + + await tool.runAsync({ + args: { + query: 'fusion energy', + type: 'fast', + numResults: 7, + category: 'research paper', + includeDomains: ['arxiv.org'], + excludeDomains: ['reddit.com'], + includeText: ['tokamak'], + excludeText: ['stellarator'], + startPublishedDate: '2024-01-01', + endPublishedDate: '2025-01-01', + }, + toolContext: {} as unknown as Context, + }); + + expect(fakeClient.search).toHaveBeenCalledWith('fusion energy', { + type: 'fast', + numResults: 7, + contents: {highlights: true}, + category: 'research paper', + includeDomains: ['arxiv.org'], + excludeDomains: ['reddit.com'], + includeText: ['tokamak'], + excludeText: ['stellarator'], + startPublishedDate: '2024-01-01', + endPublishedDate: '2025-01-01', + }); + }); + + it('clamps numResults to the API range of 1-100', async () => { + const fakeClient = makeFakeClient(() => Promise.resolve({results: []})); + const tool = new ExaSearchTool({apiKey: 'fake-key'}); + injectClient(tool, fakeClient); + + await tool.runAsync({ + args: {query: 'q', numResults: 500}, + toolContext: {} as unknown as Context, + }); + await tool.runAsync({ + args: {query: 'q', numResults: 0}, + toolContext: {} as unknown as Context, + }); + + expect(fakeClient.search).toHaveBeenNthCalledWith( + 1, + 'q', + expect.objectContaining({numResults: 100}), + ); + expect(fakeClient.search).toHaveBeenNthCalledWith( + 2, + 'q', + expect.objectContaining({numResults: 1}), + ); + }); + + it('falls back from highlights to summary to text when building the snippet', async () => { + const fakeClient = makeFakeClient(() => + Promise.resolve({ + results: [ + { + id: '1', + url: 'https://a.test', + title: 'A', + highlights: ['hi'], + }, + { + id: '2', + url: 'https://b.test', + title: 'B', + summary: 'sum', + }, + { + id: '3', + url: 'https://c.test', + title: 'C', + text: 'long body text '.repeat(50), + }, + { + id: '4', + url: 'https://d.test', + title: 'D', + }, + ], + }), + ); + const tool = new ExaSearchTool({apiKey: 'fake-key'}); + injectClient(tool, fakeClient); + + const response = (await tool.runAsync({ + args: {query: 'test'}, + toolContext: {} as unknown as Context, + })) as {results: Array<{snippet: string}>}; + + expect(response.results[0].snippet).toEqual('hi'); + expect(response.results[1].snippet).toEqual('sum'); + expect(response.results[2].snippet.length).toBeLessThanOrEqual(500); + expect(response.results[2].snippet.length).toBeGreaterThan(0); + expect(response.results[3].snippet).toEqual(''); + }); + + it('throws if query is missing or not a string', async () => { + const tool = new ExaSearchTool({apiKey: 'fake-key'}); + injectClient( + tool, + makeFakeClient(() => Promise.resolve({results: []})), + ); + + await expect( + tool.runAsync({ + args: {} as Record, + toolContext: {} as unknown as Context, + }), + ).rejects.toThrow(/non-empty `query`/); + + await expect( + tool.runAsync({ + args: {query: 42 as unknown as string}, + toolContext: {} as unknown as Context, + }), + ).rejects.toThrow(/non-empty `query`/); + }); + + it('reads EXA_API_KEY from the environment when no apiKey is passed', () => { + process.env['EXA_API_KEY'] = 'env-key'; + const tool = new ExaSearchTool(); + // getClient is private; invoke it indirectly via runAsync. Stub fetch by + // replacing the client right after lazy construction. + expect(() => + (tool as unknown as {getClient: () => unknown}).getClient(), + ).not.toThrow(); + }); +}); diff --git a/package-lock.json b/package-lock.json index 586fe9ba..a8ff6e44 100644 --- a/package-lock.json +++ b/package-lock.json @@ -61,6 +61,7 @@ "@opentelemetry/sdk-metrics": "^2.1.0", "@opentelemetry/sdk-trace-base": "^2.1.0", "@opentelemetry/sdk-trace-node": "^2.1.0", + "exa-js": "^2.12.1", "express": "^4.22.1", "google-auth-library": "^10.3.0", "js-yaml": "^4.1.1", @@ -6198,6 +6199,15 @@ "node": ">= 0.4.0" } }, + "node_modules/cross-fetch": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-4.1.0.tgz", + "integrity": "sha512-uKm5PU+MHTootlWEY+mZ4vvXoCn4fLQxT9dSc1sXVMSFkINTJVN8cAQROpwcKm8bJ/c7rgZVIBWzH5T78sNZZw==", + "license": "MIT", + "dependencies": { + "node-fetch": "^2.7.0" + } + }, "node_modules/cross-spawn": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", @@ -7251,6 +7261,61 @@ "node": ">=18.0.0" } }, + "node_modules/exa-js": { + "version": "2.12.1", + "resolved": "https://registry.npmjs.org/exa-js/-/exa-js-2.12.1.tgz", + "integrity": "sha512-ydGF1dw2V/pyksFbxaMG6pkP3/QED2WOsbjJvopEz2d0VaS5uBUXsx88uXLBkdYCX2/FuEmLQA7h4ZT46gnF7A==", + "license": "MIT", + "dependencies": { + "cross-fetch": "~4.1.0", + "dotenv": "~16.4.7", + "openai": "^5.0.1", + "zod": "^3.22.0", + "zod-to-json-schema": "^3.20.0" + } + }, + "node_modules/exa-js/node_modules/dotenv": { + "version": "16.4.7", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz", + "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/exa-js/node_modules/openai": { + "version": "5.23.2", + "resolved": "https://registry.npmjs.org/openai/-/openai-5.23.2.tgz", + "integrity": "sha512-MQBzmTulj+MM5O8SKEk/gL8a7s5mktS9zUtAkU257WjvobGc9nKcBuVwjyEEcb9SI8a8Y2G/mzn3vm9n1Jlleg==", + "license": "Apache-2.0", + "bin": { + "openai": "bin/cli" + }, + "peerDependencies": { + "ws": "^8.18.0", + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "ws": { + "optional": true + }, + "zod": { + "optional": true + } + } + }, + "node_modules/exa-js/node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/execa": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz",