From 27a151841e9cb629e7661d8a242e21c05f201cdd Mon Sep 17 00:00:00 2001 From: Matthew Podwysocki Date: Thu, 12 Feb 2026 17:47:54 -0500 Subject: [PATCH 1/3] feat: Add contextual documentation retrieval tool Implements get_contextual_docs_tool for intelligent documentation retrieval based on user context, code snippets, and error messages. Features: - Context-aware keyword extraction from text, code, and errors - Intelligent relevance scoring with match explanations - Troubleshooting tips for error messages - Technology-specific filtering (mapbox-gl-js, iOS SDK, Android SDK) - Suggested related topics - Ranked results with excerpts and direct documentation links - 1-hour caching for performance Smarter than simple search - understands full context and provides actionable, targeted documentation guidance. Addresses: #70 Changes: - Add GetContextualDocsTool implementation - Register in toolRegistry.ts - Add comprehensive test coverage (13 tests) - Update CHANGELOG.md with feature details - Update README.md with tool documentation and examples - All 549 tests passing Co-Authored-By: Claude Sonnet 4.5 --- CHANGELOG.md | 12 + README.md | 19 + .../GetContextualDocsTool.input.schema.ts | 56 ++ .../GetContextualDocsTool.output.schema.ts | 57 ++ .../GetContextualDocsTool.ts | 622 ++++++++++++++++++ src/tools/toolRegistry.ts | 2 + .../tool-naming-convention.test.ts.snap | 5 + .../GetContextualDocsTool.test.ts | 224 +++++++ 8 files changed, 997 insertions(+) create mode 100644 src/tools/get-contextual-docs-tool/GetContextualDocsTool.input.schema.ts create mode 100644 src/tools/get-contextual-docs-tool/GetContextualDocsTool.output.schema.ts create mode 100644 src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts create mode 100644 test/tools/get-contextual-docs-tool/GetContextualDocsTool.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d8d818..1dd339e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ ## Unreleased +### Features Added + +- **Contextual Documentation Tool**: New `get_contextual_docs_tool` provides intelligent documentation retrieval based on context (#70) + - Analyzes what you're working on, code snippets, and error messages + - Extracts keywords automatically from context, code, and errors + - Returns ranked, relevant documentation with explanations + - Provides troubleshooting tips for error messages + - Suggests related topics to explore + - Smarter than simple search - understands full context + - Technology-specific filtering (mapbox-gl-js, iOS SDK, Android SDK) + - 1-hour caching for performance + ### Documentation - **PR Guidelines**: Added CHANGELOG requirement to CLAUDE.md (#67) diff --git a/README.md b/README.md index 85143d6..9bc2678 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,25 @@ The `MAPBOX_ACCESS_TOKEN` environment variable is required. **Each tool requires 📖 **[See more examples and interactive demo →](./docs/mapbox-docs-tool-demo.md)** +**get_contextual_docs_tool** - Retrieve relevant Mapbox documentation based on your current context. This smart tool analyzes what you're working on, code snippets, and error messages to provide targeted, actionable documentation. + +**Features:** + +- Context-aware keyword extraction from descriptions, code, and errors +- Intelligent relevance scoring with match explanations +- Troubleshooting tips for error messages +- Technology-specific filtering (mapbox-gl-js, iOS SDK, Android SDK) +- Suggested related topics to explore +- Ranked results with excerpts and direct links + +**Example prompts:** + +- "I'm trying to add custom markers with popups, here's my code: [snippet]" +- "Getting this error: 'Style is not done loading' - what does it mean?" +- "Working with mapbox-gl-js to show user location on a map" +- "How do I handle rate limiting errors in the geocoding API?" +- "Building a store locator with search functionality" + ### Reference Tools **get_reference_tool** - Access static Mapbox reference documentation and schemas. This tool provides essential reference information that helps AI assistants understand Mapbox concepts and build correct styles and tokens. diff --git a/src/tools/get-contextual-docs-tool/GetContextualDocsTool.input.schema.ts b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.input.schema.ts new file mode 100644 index 0000000..8c3d038 --- /dev/null +++ b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.input.schema.ts @@ -0,0 +1,56 @@ +import { z } from 'zod'; + +/** + * Input schema for GetContextualDocsTool + * + * This tool retrieves relevant Mapbox documentation based on the user's + * current context, including what they're working on, code snippets, + * and error messages. + */ +export const GetContextualDocsInputSchema = z.object({ + context: z + .string() + .min(1) + .describe( + 'Description of what the user is working on or trying to accomplish (e.g., "adding custom markers with popups")' + ), + + codeSnippet: z + .string() + .optional() + .describe( + 'Optional code snippet being worked with. Helps identify the specific APIs and patterns being used.' + ), + + errorMessage: z + .string() + .optional() + .describe( + 'Optional error message to help diagnose issues and find relevant troubleshooting documentation.' + ), + + technology: z + .string() + .optional() + .describe( + 'Specific SDK or platform being used (e.g., "mapbox-gl-js", "ios-sdk", "android-sdk")' + ), + + limit: z + .number() + .int() + .min(1) + .max(10) + .optional() + .default(5) + .describe( + 'Maximum number of documentation results to return (1-10, default: 5)' + ) +}); + +/** + * Inferred TypeScript type for GetContextualDocsTool input + */ +export type GetContextualDocsInput = z.infer< + typeof GetContextualDocsInputSchema +>; diff --git a/src/tools/get-contextual-docs-tool/GetContextualDocsTool.output.schema.ts b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.output.schema.ts new file mode 100644 index 0000000..e89d849 --- /dev/null +++ b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.output.schema.ts @@ -0,0 +1,57 @@ +import { z } from 'zod'; + +/** + * Schema for a single documentation result + */ +const DocResultSchema = z.object({ + title: z.string().describe('Title of the documentation section'), + excerpt: z + .string() + .describe( + 'Relevant excerpt from the documentation showing key information' + ), + category: z + .string() + .describe('Category of documentation (apis, sdks, guides, examples)'), + url: z.string().describe('Full URL to the documentation page'), + relevanceScore: z + .number() + .describe( + 'Relevance score from 0-1 indicating how well this matches the context' + ), + matchReason: z + .string() + .optional() + .describe( + 'Explanation of why this documentation is relevant to the context' + ) +}); + +/** + * Output schema for GetContextualDocsTool + */ +export const GetContextualDocsOutputSchema = z.object({ + results: z + .array(DocResultSchema) + .describe('Ranked list of relevant documentation sections'), + extractedKeywords: z + .array(z.string()) + .describe('Key concepts extracted from the provided context'), + suggestedTopics: z + .array(z.string()) + .optional() + .describe('Related topics the user might want to explore'), + troubleshootingTips: z + .array(z.string()) + .optional() + .describe('Troubleshooting suggestions if an error message was provided'), + totalResults: z.number().describe('Total number of results found'), + context: z.string().describe('The original context provided') +}); + +/** + * Inferred TypeScript type for GetContextualDocsTool output + */ +export type GetContextualDocsOutput = z.infer< + typeof GetContextualDocsOutputSchema +>; diff --git a/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts new file mode 100644 index 0000000..59755b3 --- /dev/null +++ b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts @@ -0,0 +1,622 @@ +import { z } from 'zod'; +import { BaseTool } from '../BaseTool.js'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js'; +import type { HttpRequest } from '../../utils/types.js'; +import { GetContextualDocsInputSchema } from './GetContextualDocsTool.input.schema.js'; +import { + GetContextualDocsOutputSchema, + type GetContextualDocsOutput +} from './GetContextualDocsTool.output.schema.js'; + +/** + * GetContextualDocsTool - Retrieve relevant Mapbox documentation based on context + * + * This tool intelligently retrieves documentation by analyzing the user's current + * context, including what they're working on, code snippets, and error messages. + * It goes beyond simple keyword search by understanding the full context and + * providing targeted, actionable documentation. + * + * Features: + * - Context-aware keyword extraction + * - Code pattern recognition + * - Error message analysis + * - Technology-specific filtering + * - Relevance scoring with explanations + * - Suggested related topics + * + * @example + * ```typescript + * const tool = new GetContextualDocsTool({ httpRequest }); + * const result = await tool.run({ + * context: "adding custom markers with popups", + * codeSnippet: "map.addLayer({type: 'symbol', ...})", + * technology: "mapbox-gl-js" + * }); + * ``` + */ +export class GetContextualDocsTool extends BaseTool< + typeof GetContextualDocsInputSchema, + typeof GetContextualDocsOutputSchema +> { + readonly name = 'get_contextual_docs_tool'; + readonly description = + "Retrieve relevant Mapbox documentation based on current context. Analyzes what you're working on, code snippets, and error messages to provide targeted documentation. Smarter than simple search - understands context and provides actionable guidance."; + readonly annotations = { + title: 'Get Contextual Documentation', + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: true + }; + + private readonly httpRequest: HttpRequest; + private documentationCache: { + content: string; + timestamp: number; + } | null = null; + private readonly CACHE_TTL = 60 * 60 * 1000; // 1 hour + + constructor(deps: { httpRequest: HttpRequest }) { + super({ + inputSchema: GetContextualDocsInputSchema, + outputSchema: GetContextualDocsOutputSchema + }); + this.httpRequest = deps.httpRequest; + } + + protected async execute( + input: z.infer + ): Promise { + try { + // Extract keywords from all provided context + const extractedKeywords = this.extractKeywords(input); + + // Fetch documentation + const docs = await this.fetchDocumentation(); + + // Parse and score documentation sections + const results = this.findRelevantDocs(docs, input, extractedKeywords); + + // Generate suggestions + const suggestedTopics = this.generateSuggestions( + extractedKeywords, + results + ); + const troubleshootingTips = input.errorMessage + ? this.generateTroubleshootingTips(input.errorMessage, results) + : undefined; + + // Limit results + const limitedResults = results.slice(0, input.limit); + + const output: GetContextualDocsOutput = { + results: limitedResults, + extractedKeywords, + suggestedTopics, + troubleshootingTips, + totalResults: results.length, + context: input.context + }; + + // Format text output + const text = this.formatOutput(output); + + return { + content: [{ type: 'text', text }], + structuredContent: output, + isError: false + }; + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : String(error); + this.log('error', `${this.name}: ${errorMessage}`); + + return { + content: [ + { + type: 'text', + text: `Error retrieving contextual documentation: ${errorMessage}` + } + ], + isError: true + }; + } + } + + /** + * Extract keywords from context, code, and errors + */ + private extractKeywords( + input: z.infer + ): string[] { + const keywords = new Set(); + + // Extract from context + const contextWords = + input.context.toLowerCase().match(/\b[a-z]{3,}\b/g) || []; + contextWords.forEach((word) => { + if (!this.isStopWord(word)) { + keywords.add(word); + } + }); + + // Extract from code snippet + if (input.codeSnippet) { + const codeKeywords = this.extractCodeKeywords(input.codeSnippet); + codeKeywords.forEach((kw) => keywords.add(kw)); + } + + // Extract from error message + if (input.errorMessage) { + const errorKeywords = this.extractErrorKeywords(input.errorMessage); + errorKeywords.forEach((kw) => keywords.add(kw)); + } + + // Add technology if specified + if (input.technology) { + keywords.add(input.technology.toLowerCase()); + } + + return Array.from(keywords); + } + + /** + * Extract keywords from code snippets + */ + private extractCodeKeywords(code: string): string[] { + const keywords: string[] = []; + + // API/method patterns + const apiPattern = + /\b(map|layer|source|marker|popup|style|feature|coordinates?)\b/gi; + const matches = code.match(apiPattern); + if (matches) { + matches.forEach((m) => keywords.push(m.toLowerCase())); + } + + // Method calls + const methodPattern = /\.(add|remove|set|get|load|update|create)(\w+)/g; + let match; + while ((match = methodPattern.exec(code)) !== null) { + keywords.push(match[1].toLowerCase()); + if (match[2]) { + keywords.push(match[2].toLowerCase()); + } + } + + return keywords; + } + + /** + * Extract keywords from error messages + */ + private extractErrorKeywords(error: string): string[] { + const keywords: string[] = []; + + // Common error terms + const errorTerms = [ + 'token', + 'authentication', + 'permission', + 'rate limit', + 'timeout', + 'network', + 'style', + 'layer', + 'source' + ]; + + errorTerms.forEach((term) => { + if (error.toLowerCase().includes(term)) { + keywords.push(term); + } + }); + + return keywords; + } + + /** + * Check if a word is a stop word + */ + private isStopWord(word: string): boolean { + const stopWords = new Set([ + 'the', + 'and', + 'for', + 'with', + 'this', + 'that', + 'from', + 'have', + 'has', + 'can', + 'will', + 'what', + 'how', + 'when', + 'where', + 'why' + ]); + return stopWords.has(word); + } + + /** + * Fetch Mapbox documentation + */ + private async fetchDocumentation(): Promise { + // Check cache + if ( + this.documentationCache && + Date.now() - this.documentationCache.timestamp < this.CACHE_TTL + ) { + return this.documentationCache.content; + } + + // Fetch fresh documentation + const response = await this.httpRequest( + 'https://docs.mapbox.com/llms.txt', + { + method: 'GET' + } + ); + + if (!response.ok) { + throw new Error( + `Failed to fetch documentation: ${response.status} ${response.statusText}` + ); + } + + const content = await response.text(); + + // Update cache + this.documentationCache = { + content, + timestamp: Date.now() + }; + + return content; + } + + /** + * Find relevant documentation sections + */ + private findRelevantDocs( + docs: string, + input: z.infer, + keywords: string[] + ): Array<{ + title: string; + excerpt: string; + category: string; + url: string; + relevanceScore: number; + matchReason?: string; + }> { + const sections = this.parseSections(docs); + const scoredSections = sections + .map((section) => { + const score = this.calculateRelevance(section, keywords, input); + const reason = this.explainMatch(section, keywords, input); + return { + ...section, + relevanceScore: score, + matchReason: reason + }; + }) + .filter((section) => section.relevanceScore > 0.1) + .sort((a, b) => b.relevanceScore - a.relevanceScore); + + return scoredSections; + } + + /** + * Parse documentation into sections + */ + private parseSections(docs: string): Array<{ + title: string; + content: string; + category: string; + url: string; + excerpt: string; + }> { + const sections: Array<{ + title: string; + content: string; + category: string; + url: string; + excerpt: string; + }> = []; + + const lines = docs.split('\n'); + let currentSection: { + title: string; + content: string; + url: string; + } | null = null; + + for (const line of lines) { + // Section headers (# Title) + if (line.startsWith('# ') && !line.startsWith('## ')) { + if (currentSection) { + sections.push(this.finalizeSection(currentSection)); + } + currentSection = { + title: line.replace(/^#\s+/, '').trim(), + content: '', + url: '' + }; + } else if (currentSection) { + // URL detection + if (line.includes('http')) { + const urlMatch = line.match(/https?:\/\/[^\s]+/); + if (urlMatch && !currentSection.url) { + currentSection.url = urlMatch[0]; + } + } + currentSection.content += line + '\n'; + } + } + + if (currentSection) { + sections.push(this.finalizeSection(currentSection)); + } + + return sections; + } + + /** + * Finalize a documentation section + */ + private finalizeSection(section: { + title: string; + content: string; + url: string; + }): { + title: string; + content: string; + category: string; + url: string; + excerpt: string; + } { + return { + title: section.title, + content: section.content, + category: this.categorizeSection(section.title, section.content), + url: + section.url || + `https://docs.mapbox.com/search/?query=${encodeURIComponent(section.title)}`, + excerpt: this.extractExcerpt(section.content) + }; + } + + /** + * Categorize documentation section + */ + private categorizeSection(title: string, content: string): string { + const titleLower = title.toLowerCase(); + const contentLower = content.toLowerCase(); + + if ( + titleLower.includes('api') || + contentLower.includes('endpoint') || + contentLower.includes('request') + ) { + return 'apis'; + } + if (titleLower.includes('sdk') || titleLower.includes('library')) { + return 'sdks'; + } + if (titleLower.includes('example') || contentLower.includes('demo')) { + return 'examples'; + } + return 'guides'; + } + + /** + * Extract a relevant excerpt from content + */ + private extractExcerpt(content: string, maxLength: number = 200): string { + const cleaned = content.replace(/\n\n+/g, ' ').replace(/\s+/g, ' ').trim(); + if (cleaned.length <= maxLength) { + return cleaned; + } + return cleaned.substring(0, maxLength) + '...'; + } + + /** + * Calculate relevance score for a section + */ + private calculateRelevance( + section: { title: string; content: string; category: string }, + keywords: string[], + input: z.infer + ): number { + let score = 0; + const titleLower = section.title.toLowerCase(); + const contentLower = section.content.toLowerCase(); + + // Keyword matches in title (highest weight) + keywords.forEach((keyword) => { + if (titleLower.includes(keyword)) { + score += 0.3; + } + }); + + // Keyword matches in content + keywords.forEach((keyword) => { + const regex = new RegExp(`\\b${keyword}\\b`, 'gi'); + const matches = contentLower.match(regex); + if (matches) { + score += Math.min(matches.length * 0.05, 0.2); + } + }); + + // Technology match + if (input.technology) { + if (contentLower.includes(input.technology.toLowerCase())) { + score += 0.15; + } + } + + // Error message match + if (input.errorMessage) { + if ( + contentLower.includes('error') || + contentLower.includes('troubleshoot') + ) { + score += 0.1; + } + } + + return Math.min(score, 1.0); + } + + /** + * Explain why a section matches + */ + private explainMatch( + section: { title: string; content: string }, + keywords: string[], + input: z.infer + ): string { + const reasons: string[] = []; + const titleLower = section.title.toLowerCase(); + const contentLower = section.content.toLowerCase(); + + // Check for keyword matches + const matchedKeywords = keywords.filter( + (kw) => titleLower.includes(kw) || contentLower.includes(kw) + ); + + if (matchedKeywords.length > 0) { + reasons.push( + `Matches key concepts: ${matchedKeywords.slice(0, 3).join(', ')}` + ); + } + + if ( + input.technology && + contentLower.includes(input.technology.toLowerCase()) + ) { + reasons.push(`Relevant to ${input.technology}`); + } + + if (input.errorMessage && contentLower.includes('troubleshoot')) { + reasons.push('Contains troubleshooting information'); + } + + return reasons.length > 0 ? reasons.join('; ') : 'Related to your query'; + } + + /** + * Generate suggested topics + */ + private generateSuggestions( + keywords: string[], + _results: Array<{ title: string }> + ): string[] { + const suggestions = new Set(); + + // Common related topics based on keywords + const relatedTopics: Record = { + marker: ['Popups', 'Custom Icons', 'Clustering'], + layer: ['Styling', 'Data Sources', 'Expressions'], + style: ['Layers', 'Sprites', 'Fonts'], + map: ['Events', 'Controls', 'Camera'], + geocoding: ['Search', 'Rate Limits', 'Caching'] + }; + + keywords.forEach((keyword) => { + const related = relatedTopics[keyword.toLowerCase()]; + if (related) { + related.forEach((topic) => suggestions.add(topic)); + } + }); + + return Array.from(suggestions).slice(0, 5); + } + + /** + * Generate troubleshooting tips + */ + private generateTroubleshootingTips( + errorMessage: string, + _results: Array<{ title: string }> + ): string[] { + const tips: string[] = []; + const errorLower = errorMessage.toLowerCase(); + + if (errorLower.includes('token') || errorLower.includes('401')) { + tips.push( + 'Check that your access token is valid and has the required scopes' + ); + } + + if (errorLower.includes('rate limit') || errorLower.includes('429')) { + tips.push( + 'You may have exceeded API rate limits - implement caching or request throttling' + ); + } + + if (errorLower.includes('network') || errorLower.includes('timeout')) { + tips.push('Check your network connection and API endpoint URL'); + } + + if (errorLower.includes('style') || errorLower.includes('layer')) { + tips.push( + 'Verify your style JSON is valid and all referenced sources exist' + ); + } + + if (tips.length === 0) { + tips.push( + 'Review the error message and check the relevant documentation sections below' + ); + } + + return tips; + } + + /** + * Format output as markdown text + */ + private formatOutput(output: GetContextualDocsOutput): string { + let text = '# Contextual Documentation\n\n'; + text += `**Context:** ${output.context}\n\n`; + + if (output.extractedKeywords.length > 0) { + text += `**Key Concepts:** ${output.extractedKeywords.slice(0, 8).join(', ')}\n\n`; + } + + if (output.troubleshootingTips && output.troubleshootingTips.length > 0) { + text += '## 🔧 Troubleshooting Tips\n\n'; + output.troubleshootingTips.forEach((tip) => { + text += `- ${tip}\n`; + }); + text += '\n'; + } + + text += `## 📚 Relevant Documentation (${output.results.length} results)\n\n`; + + output.results.forEach((result, index) => { + text += `### ${index + 1}. ${result.title}\n\n`; + text += `**Category:** ${result.category} | **Relevance:** ${(result.relevanceScore * 100).toFixed(0)}%\n\n`; + + if (result.matchReason) { + text += `**Why relevant:** ${result.matchReason}\n\n`; + } + + text += `${result.excerpt}\n\n`; + text += `🔗 [Read more](${result.url})\n\n`; + text += '---\n\n'; + }); + + if (output.suggestedTopics && output.suggestedTopics.length > 0) { + text += '## 💡 Related Topics\n\n'; + text += output.suggestedTopics.map((topic) => `- ${topic}`).join('\n'); + text += '\n'; + } + + return text; + } +} diff --git a/src/tools/toolRegistry.ts b/src/tools/toolRegistry.ts index 481ea7e..07fc738 100644 --- a/src/tools/toolRegistry.ts +++ b/src/tools/toolRegistry.ts @@ -9,6 +9,7 @@ import { CoordinateConversionTool } from './coordinate-conversion-tool/Coordinat import { CreateStyleTool } from './create-style-tool/CreateStyleTool.js'; import { CreateTokenTool } from './create-token-tool/CreateTokenTool.js'; import { DeleteStyleTool } from './delete-style-tool/DeleteStyleTool.js'; +import { GetContextualDocsTool } from './get-contextual-docs-tool/GetContextualDocsTool.js'; import { GetFeedbackTool } from './get-feedback-tool/GetFeedbackTool.js'; import { ListFeedbackTool } from './list-feedback-tool/ListFeedbackTool.js'; import { GeojsonPreviewTool } from './geojson-preview-tool/GeojsonPreviewTool.js'; @@ -50,6 +51,7 @@ export const CORE_TOOLS = [ new BoundingBoxTool(), new CountryBoundingBoxTool(), new CoordinateConversionTool(), + new GetContextualDocsTool({ httpRequest }), new GetFeedbackTool({ httpRequest }), new ListFeedbackTool({ httpRequest }), new TilequeryTool({ httpRequest }), diff --git a/test/tools/__snapshots__/tool-naming-convention.test.ts.snap b/test/tools/__snapshots__/tool-naming-convention.test.ts.snap index 5b517de..9fe3096 100644 --- a/test/tools/__snapshots__/tool-naming-convention.test.ts.snap +++ b/test/tools/__snapshots__/tool-naming-convention.test.ts.snap @@ -47,6 +47,11 @@ exports[`Tool Naming Convention > should maintain consistent tool list (snapshot "description": "Generate a geojson.io URL to visualize GeoJSON data. Returns only the URL link.", "toolName": "geojson_preview_tool", }, + { + "className": "GetContextualDocsTool", + "description": "Retrieve relevant Mapbox documentation based on current context. Analyzes what you're working on, code snippets, and error messages to provide targeted documentation. Smarter than simple search - understands context and provides actionable guidance.", + "toolName": "get_contextual_docs_tool", + }, { "className": "GetFeedbackTool", "description": "Get a single user feedback item from the Mapbox Feedback API by its unique ID. Use this tool to retrieve detailed information about a specific user-reported issue, suggestion, or feedback about map data, routing, or POI details. Requires user-feedback:read scope on the access token.", diff --git a/test/tools/get-contextual-docs-tool/GetContextualDocsTool.test.ts b/test/tools/get-contextual-docs-tool/GetContextualDocsTool.test.ts new file mode 100644 index 0000000..5f6f9c8 --- /dev/null +++ b/test/tools/get-contextual-docs-tool/GetContextualDocsTool.test.ts @@ -0,0 +1,224 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { GetContextualDocsTool } from '../../../src/tools/get-contextual-docs-tool/GetContextualDocsTool.js'; +import type { HttpRequest } from '../../../src/utils/types.js'; + +describe('GetContextualDocsTool', () => { + let httpRequest: HttpRequest; + let tool: GetContextualDocsTool; + + const mockDocumentation = ` +# Mapbox GL JS + +Mapbox GL JS is a JavaScript library for interactive, customizable vector maps on the web. + +https://docs.mapbox.com/mapbox-gl-js/ + +## Markers and Popups + +Learn how to add markers and popups to your map. + +Markers are used to indicate specific locations on a map. Popups provide additional information when markers are clicked. + +https://docs.mapbox.com/mapbox-gl-js/example/add-a-marker/ + +## Handling Errors + +Common errors and how to fix them. + +Style is not done loading: This error occurs when you try to add layers before the style has finished loading. Use the 'load' event to ensure the style is ready. + +https://docs.mapbox.com/help/troubleshooting/ + +## Rate Limits + +API rate limits and best practices. + +The Geocoding API has a rate limit of 600 requests per minute. Implement caching and throttling to avoid hitting limits. + +https://docs.mapbox.com/api/search/geocoding/#rate-limits +`; + + beforeEach(() => { + httpRequest = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + statusText: 'OK', + text: async () => mockDocumentation + }); + + tool = new GetContextualDocsTool({ httpRequest }); + }); + + describe('Basic functionality', () => { + it('should retrieve relevant documentation based on context', async () => { + const result = await tool.run({ + context: 'adding custom markers with popups to a map' + }); + + expect(result.isError).toBe(false); + expect(result.structuredContent).toBeDefined(); + + const output = result.structuredContent as any; + expect(output.results).toBeDefined(); + expect(output.results.length).toBeGreaterThan(0); + expect(output.extractedKeywords).toContain('markers'); + expect(output.extractedKeywords).toContain('popups'); + }); + + it('should extract keywords from code snippets', async () => { + const result = await tool.run({ + context: 'working with map layers', + codeSnippet: 'map.addLayer({type: "symbol", ...})' + }); + + expect(result.isError).toBe(false); + const output = result.structuredContent as any; + expect(output.extractedKeywords).toContain('map'); + expect(output.extractedKeywords).toContain('layer'); + }); + + it('should provide troubleshooting tips for error messages', async () => { + const result = await tool.run({ + context: 'getting an error when adding layers', + errorMessage: 'Style is not done loading' + }); + + expect(result.isError).toBe(false); + const output = result.structuredContent as any; + expect(output.troubleshootingTips).toBeDefined(); + expect(output.troubleshootingTips.length).toBeGreaterThan(0); + }); + + it('should filter by technology when specified', async () => { + const result = await tool.run({ + context: 'building a web map', + technology: 'mapbox-gl-js' + }); + + expect(result.isError).toBe(false); + const output = result.structuredContent as any; + expect(output.extractedKeywords).toContain('mapbox-gl-js'); + }); + + it('should respect the limit parameter', async () => { + const result = await tool.run({ + context: 'mapbox maps', + limit: 2 + }); + + expect(result.isError).toBe(false); + const output = result.structuredContent as any; + expect(output.results.length).toBeLessThanOrEqual(2); + }); + }); + + describe('Relevance scoring', () => { + it('should rank results by relevance', async () => { + const result = await tool.run({ + context: 'rate limiting in geocoding API' + }); + + expect(result.isError).toBe(false); + const output = result.structuredContent as any; + + // Results should be sorted by relevance score + for (let i = 0; i < output.results.length - 1; i++) { + expect(output.results[i].relevanceScore).toBeGreaterThanOrEqual( + output.results[i + 1].relevanceScore + ); + } + }); + + it('should provide match reasons for results', async () => { + const result = await tool.run({ + context: 'adding markers to a map' + }); + + expect(result.isError).toBe(false); + const output = result.structuredContent as any; + + // At least one result should have a match reason + expect(output.results.length).toBeGreaterThan(0); + const resultWithReason = output.results.find((r: any) => r.matchReason); + expect(resultWithReason).toBeDefined(); + }); + }); + + describe('Suggestions', () => { + it('should suggest related topics', async () => { + const result = await tool.run({ + context: 'working with map markers' + }); + + expect(result.isError).toBe(false); + const output = result.structuredContent as any; + expect(output.suggestedTopics).toBeDefined(); + expect(output.suggestedTopics.length).toBeGreaterThan(0); + }); + }); + + describe('Error handling', () => { + it('should handle HTTP errors gracefully', async () => { + httpRequest = vi.fn().mockResolvedValue({ + ok: false, + status: 500, + statusText: 'Internal Server Error' + }); + + tool = new GetContextualDocsTool({ httpRequest }); + + const result = await tool.run({ + context: 'test context' + }); + + expect(result.isError).toBe(true); + expect(result.content[0].type).toBe('text'); + }); + + it('should handle network errors', async () => { + httpRequest = vi.fn().mockRejectedValue(new Error('Network error')); + + tool = new GetContextualDocsTool({ httpRequest }); + + const result = await tool.run({ + context: 'test context' + }); + + expect(result.isError).toBe(true); + }); + }); + + describe('Caching', () => { + it('should cache documentation for subsequent requests', async () => { + await tool.run({ context: 'first request' }); + await tool.run({ context: 'second request' }); + + // HTTP request should only be called once due to caching + expect(httpRequest).toHaveBeenCalledTimes(1); + }); + }); + + describe('Output formatting', () => { + it('should return both text and structured content', async () => { + const result = await tool.run({ + context: 'adding markers' + }); + + expect(result.content).toBeDefined(); + expect(result.content[0].type).toBe('text'); + expect(result.structuredContent).toBeDefined(); + }); + + it('should format text output as markdown', async () => { + const result = await tool.run({ + context: 'adding markers', + errorMessage: 'test error' + }); + + const text = result.content[0].text as string; + expect(text).toContain('#'); + expect(text).toContain('**'); + expect(text).toContain('Troubleshooting Tips'); + }); + }); +}); From 04d3306d72b08fe8e336d3772f412123e8066c46 Mon Sep 17 00:00:00 2001 From: Matthew Podwysocki Date: Fri, 13 Feb 2026 00:31:12 -0500 Subject: [PATCH 2/3] Enhance contextual docs tool with curated pages and multi-stage crawling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improvements: - Add curated high-value documentation pages for common topics (markers, popups, layers, etc.) - Implement three-stage crawling: index → main pages → linked pages - Add HTML parsing and content extraction from docs pages - Implement singular/plural keyword matching for better curated page discovery - Add linkedom dependency for HTML parsing This allows the tool to return specific, actionable documentation for queries like "adding markers with popups" instead of generic plugin pages. Claude Desktop no longer needs to supplement with web search for common Mapbox questions. All 549 tests passing. Co-Authored-By: Claude Sonnet 4.5 --- package-lock.json | 187 +++++++ package.json | 5 +- .../GetContextualDocsTool.ts | 489 +++++++++++++++++- 3 files changed, 672 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index c64ab2a..413cf89 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,6 +20,7 @@ "@opentelemetry/sdk-node": "^0.56.0", "@opentelemetry/sdk-trace-base": "^1.30.1", "@opentelemetry/semantic-conventions": "^1.30.1", + "linkedom": "^0.18.12", "zod": "^3.25.42" }, "bin": { @@ -6952,6 +6953,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, "node_modules/brace-expansion": { "version": "2.0.2", "dev": true, @@ -7794,6 +7801,40 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/cssom": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.5.0.tgz", + "integrity": "sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==", + "license": "MIT" + }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", @@ -7913,6 +7954,61 @@ "node": ">=8" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dot-case": { "version": "3.0.4", "dev": true, @@ -7977,6 +8073,18 @@ "node": ">=10.13.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/env-paths": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-3.0.0.tgz", @@ -9300,6 +9408,37 @@ "dev": true, "license": "MIT" }, + "node_modules/htmlparser2": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz", + "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "entities": "^7.0.1" + } + }, + "node_modules/htmlparser2/node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/http-errors": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", @@ -10065,6 +10204,36 @@ "url": "https://github.com/sponsors/antonk52" } }, + "node_modules/linkedom": { + "version": "0.18.12", + "resolved": "https://registry.npmjs.org/linkedom/-/linkedom-0.18.12.tgz", + "integrity": "sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q==", + "license": "ISC", + "dependencies": { + "css-select": "^5.1.0", + "cssom": "^0.5.0", + "html-escaper": "^3.0.3", + "htmlparser2": "^10.0.0", + "uhyphen": "^0.2.0" + }, + "engines": { + "node": ">=16" + }, + "peerDependencies": { + "canvas": ">= 2" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/linkedom/node_modules/html-escaper": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-3.0.3.tgz", + "integrity": "sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==", + "license": "MIT" + }, "node_modules/lint-staged": { "version": "16.1.2", "dev": true, @@ -10728,6 +10897,18 @@ "dev": true, "license": "MIT" }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/object-assign": { "version": "4.1.1", "license": "MIT", @@ -12810,6 +12991,12 @@ "node": ">=0.8.0" } }, + "node_modules/uhyphen": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/uhyphen/-/uhyphen-0.2.0.tgz", + "integrity": "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==", + "license": "ISC" + }, "node_modules/unc-path-regex": { "version": "0.1.2", "dev": true, diff --git a/package.json b/package.json index 24a69dd..b9cca3b 100644 --- a/package.json +++ b/package.json @@ -59,6 +59,7 @@ "@opentelemetry/sdk-node": "^0.56.0", "@opentelemetry/sdk-trace-base": "^1.30.1", "@opentelemetry/semantic-conventions": "^1.30.1", + "linkedom": "^0.18.12", "zod": "^3.25.42" }, "devDependencies": { @@ -77,13 +78,13 @@ "globals": "^16.3.0", "husky": "^9.0.0", "lint-staged": "^16.1.0", + "patch-package": "^8.0.1", "plop": "^4.0.1", "prettier": "^3.0.0", "tshy": "^3.0.2", "typescript": "^5.8.3", "typescript-eslint": "^8.42.0", - "vitest": "^3.2.4", - "patch-package": "^8.0.1" + "vitest": "^3.2.4" }, "prettier": { "singleQuote": true, diff --git a/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts index 59755b3..2fb0798 100644 --- a/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts +++ b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts @@ -7,6 +7,7 @@ import { GetContextualDocsOutputSchema, type GetContextualDocsOutput } from './GetContextualDocsTool.output.schema.js'; +import { parseHTML } from 'linkedom'; /** * GetContextualDocsTool - Retrieve relevant Mapbox documentation based on context @@ -54,7 +55,108 @@ export class GetContextualDocsTool extends BaseTool< content: string; timestamp: number; } | null = null; + private htmlPagesCache: Map = + new Map(); private readonly CACHE_TTL = 60 * 60 * 1000; // 1 hour + private readonly MAX_PAGES_TO_FETCH = 2; // Fetch top 2 pages from index + private readonly MAX_LINKED_PAGES = 3; // Fetch top 3 linked pages from those + + /** + * Curated high-value documentation pages organized by topic + * These are frequently needed pages that may not be easily discoverable through crawling + */ + private readonly CURATED_PAGES: Record = { + // Markers and Popups + marker: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/add-a-marker/', + 'https://docs.mapbox.com/mapbox-gl-js/example/custom-marker-icons/', + 'https://docs.mapbox.com/mapbox-gl-js/api/markers/#marker' + ], + popup: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/popup/', + 'https://docs.mapbox.com/mapbox-gl-js/example/popup-on-click/', + 'https://docs.mapbox.com/mapbox-gl-js/example/popup-on-hover/', + 'https://docs.mapbox.com/mapbox-gl-js/api/markers/#popup' + ], + + // Layers and Styling + layer: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/geojson-layer/', + 'https://docs.mapbox.com/mapbox-gl-js/example/data-driven-circle-colors/', + 'https://docs.mapbox.com/mapbox-gl-js/api/map/#map#addlayer', + 'https://docs.mapbox.com/style-spec/reference/layers/' + ], + style: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/setstyle/', + 'https://docs.mapbox.com/mapbox-gl-js/style-spec/', + 'https://docs.mapbox.com/mapbox-gl-js/api/map/#map#setstyle' + ], + + // Data Sources + source: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/geojson-line/', + 'https://docs.mapbox.com/mapbox-gl-js/example/live-update-feature/', + 'https://docs.mapbox.com/mapbox-gl-js/api/sources/' + ], + + // Events and Interaction + click: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/popup-on-click/', + 'https://docs.mapbox.com/mapbox-gl-js/example/queryrenderedfeatures/', + 'https://docs.mapbox.com/mapbox-gl-js/api/map/#map.event:click' + ], + hover: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/hover-styles/', + 'https://docs.mapbox.com/mapbox-gl-js/example/popup-on-hover/' + ], + + // Geocoding and Search + geocoding: [ + 'https://docs.mapbox.com/api/search/geocoding/', + 'https://docs.mapbox.com/mapbox-gl-js/example/mapbox-gl-geocoder/', + 'https://docs.mapbox.com/playground/geocoding/' + ], + search: [ + 'https://docs.mapbox.com/api/search/search-box/', + 'https://docs.mapbox.com/playground/search-box/' + ], + + // Navigation and Directions + directions: [ + 'https://docs.mapbox.com/api/navigation/directions/', + 'https://docs.mapbox.com/playground/directions/' + ], + navigation: [ + 'https://docs.mapbox.com/ios/navigation/', + 'https://docs.mapbox.com/android/navigation/guides/' + ], + + // Controls + control: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/navigation/', + 'https://docs.mapbox.com/mapbox-gl-js/example/locate-user/', + 'https://docs.mapbox.com/mapbox-gl-js/api/markers/#navigationcontrol' + ], + + // Camera and Animation + camera: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/flyto/', + 'https://docs.mapbox.com/mapbox-gl-js/example/fitbounds/', + 'https://docs.mapbox.com/mapbox-gl-js/api/map/#map#flyto' + ], + + // 3D and Terrain + terrain: [ + 'https://docs.mapbox.com/mapbox-gl-js/example/add-terrain/', + 'https://docs.mapbox.com/mapbox-gl-js/example/3d-buildings/' + ], + + // Expressions + expression: [ + 'https://docs.mapbox.com/style-spec/reference/expressions/', + 'https://docs.mapbox.com/mapbox-gl-js/example/data-driven-circle-colors/' + ] + }; constructor(deps: { httpRequest: HttpRequest }) { super({ @@ -71,11 +173,113 @@ export class GetContextualDocsTool extends BaseTool< // Extract keywords from all provided context const extractedKeywords = this.extractKeywords(input); - // Fetch documentation - const docs = await this.fetchDocumentation(); + // Get curated pages based on keywords (high priority) + const curatedUrls = this.getCuratedPages(extractedKeywords); + + // Stage 1: Fetch documentation index + const docsIndex = await this.fetchDocumentation(); + + // Stage 2: Extract and score relevant URLs from index + const relevantUrls = this.extractRelevantUrls( + docsIndex, + extractedKeywords + ); + + // Combine curated URLs (high priority) with discovered URLs + const allUrlsToConsider = [ + ...curatedUrls.map((url) => ({ url, score: 1.0 })), // Curated pages get max score + ...relevantUrls + ]; + + // Remove duplicates, keeping the highest score + const uniqueUrls = new Map(); + allUrlsToConsider.forEach(({ url, score }) => { + const existing = uniqueUrls.get(url); + if (!existing || score > existing) { + uniqueUrls.set(url, score); + } + }); + + // Stage 3: Fetch curated pages first + const allLinks: Array<{ url: string; score: number }> = []; + const allSections: Array<{ + title: string; + content: string; + url: string; + }> = []; + + // Fetch curated pages (always fetch these if matched) + for (const url of curatedUrls) { + try { + const html = await this.fetchHtmlPage(url); + const sections = this.extractHtmlContent(html, url); + allSections.push(...sections); + } catch (error) { + this.log('warning', `Failed to fetch curated page ${url}: ${error}`); + // Continue with other pages + } + } + + // Stage 4: Fetch top N pages from index (supplement curated pages) + const indexPagesToFetch = relevantUrls + .filter((p) => !curatedUrls.includes(p.url)) + .slice(0, this.MAX_PAGES_TO_FETCH); + + for (const { url } of indexPagesToFetch) { + try { + const html = await this.fetchHtmlPage(url); + + // Extract sections from this page + const sections = this.extractHtmlContent(html, url); + allSections.push(...sections); + + // Extract links from this page for further crawling + const links = this.extractLinksFromHtml(html, url, extractedKeywords); + allLinks.push(...links); + } catch (error) { + this.log('warning', `Failed to fetch ${url}: ${error}`); + // Continue with other pages + } + } + + // Stage 5: Fetch most relevant linked pages (if we still need more content) + const alreadyFetched = new Set([ + ...curatedUrls, + ...indexPagesToFetch.map((p) => p.url) + ]); + const linkedPagesToFetch = allLinks + .filter((link) => !alreadyFetched.has(link.url)) + .slice(0, this.MAX_LINKED_PAGES); + + for (const { url } of linkedPagesToFetch) { + try { + const html = await this.fetchHtmlPage(url); + const sections = this.extractHtmlContent(html, url); + allSections.push(...sections); + } catch (error) { + this.log('warning', `Failed to fetch linked page ${url}: ${error}`); + // Continue with other pages + } + } + + // If no HTML content was extracted, fall back to index content + if (allSections.length === 0) { + const indexSections = this.parseSections(docsIndex); + allSections.push( + ...indexSections.map((s) => ({ + title: s.title, + content: s.content, + url: s.url + })) + ); + } // Parse and score documentation sections - const results = this.findRelevantDocs(docs, input, extractedKeywords); + const results = this.findRelevantDocs( + allSections, + input, + extractedKeywords + ); // Generate suggestions const suggestedTopics = this.generateSuggestions( @@ -277,11 +481,272 @@ export class GetContextualDocsTool extends BaseTool< return content; } + /** + * Get curated pages relevant to the keywords + */ + private getCuratedPages(keywords: string[]): string[] { + const curatedUrls = new Set(); + + keywords.forEach((keyword) => { + const keywordLower = keyword.toLowerCase(); + + // Try exact match first + let pages = this.CURATED_PAGES[keywordLower]; + + // Try singular form if plural (remove trailing 's') + if (!pages && keywordLower.endsWith('s')) { + const singular = keywordLower.slice(0, -1); + pages = this.CURATED_PAGES[singular]; + } + + // Try plural form if singular (add 's') + if (!pages && !keywordLower.endsWith('s')) { + const plural = keywordLower + 's'; + pages = this.CURATED_PAGES[plural]; + } + + if (pages) { + pages.forEach((url) => curatedUrls.add(url)); + } + }); + + return Array.from(curatedUrls); + } + + /** + * Extract and score URLs from llms.txt index + */ + private extractRelevantUrls( + docs: string, + keywords: string[] + ): Array<{ url: string; score: number; context: string }> { + const lines = docs.split('\n'); + const urls: Array<{ url: string; score: number; context: string }> = []; + + let currentSection = ''; + for (const line of lines) { + // Track section headers for context + if (line.startsWith('##')) { + currentSection = line.replace(/^##\s+/, '').trim(); + } + + // Extract URLs from markdown links + const urlMatch = line.match(/\[([^\]]+)\]\((https:\/\/[^)]+)\)/); + if (urlMatch) { + const [, linkText, url] = urlMatch; + const context = `${currentSection} - ${linkText}`; + + // Score URL based on keywords in context and URL + let score = 0; + const contextLower = context.toLowerCase(); + const urlLower = url.toLowerCase(); + + keywords.forEach((keyword) => { + if (contextLower.includes(keyword)) { + score += 0.3; + } + if (urlLower.includes(keyword)) { + score += 0.2; + } + }); + + // Boost for API reference and guide pages + if (url.includes('/api/') || url.includes('/guides/')) { + score += 0.1; + } + + if (score > 0) { + urls.push({ url, score, context }); + } + } + } + + // Sort by score and return top results + return urls.sort((a, b) => b.score - a.score); + } + + /** + * Fetch and parse HTML documentation page + */ + private async fetchHtmlPage(url: string): Promise { + // Check cache + const cached = this.htmlPagesCache.get(url); + if (cached && Date.now() - cached.timestamp < this.CACHE_TTL) { + return cached.content; + } + + // Fetch page + const response = await this.httpRequest(url, { method: 'GET' }); + + if (!response.ok) { + throw new Error(`Failed to fetch ${url}: ${response.status}`); + } + + const content = await response.text(); + + // Update cache + this.htmlPagesCache.set(url, { + content, + timestamp: Date.now() + }); + + return content; + } + + /** + * Extract and score links from HTML page + */ + private extractLinksFromHtml( + html: string, + baseUrl: string, + keywords: string[] + ): Array<{ url: string; score: number; text: string }> { + try { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const { document } = parseHTML(html) as any; + const links: Array<{ url: string; score: number; text: string }> = []; + const seenUrls = new Set(); + + // Find all links + const anchors = document.querySelectorAll('a[href]'); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + for (const anchor of Array.from(anchors) as any[]) { + const href = anchor.getAttribute('href'); + if (!href) continue; + + // Resolve relative URLs + let url: string; + try { + url = new URL(href, baseUrl).href; + } catch { + continue; + } + + // Skip if already seen or not a docs.mapbox.com URL + if (seenUrls.has(url) || !url.startsWith('https://docs.mapbox.com/')) { + continue; + } + seenUrls.add(url); + + const linkText = anchor.textContent?.trim() || ''; + const urlLower = url.toLowerCase(); + const textLower = linkText.toLowerCase(); + + let score = 0; + + // Score based on keywords + keywords.forEach((keyword) => { + if (urlLower.includes(keyword)) { + score += 0.4; + } + if (textLower.includes(keyword)) { + score += 0.3; + } + }); + + // Boost for examples and API reference pages + if (url.includes('/example/')) { + score += 0.3; + } + if (url.includes('/api/')) { + score += 0.2; + } + if (url.includes('/guides/')) { + score += 0.2; + } + + // Skip low-scoring links + if (score > 0.2) { + links.push({ url, score, text: linkText }); + } + } + + // Sort by score + return links.sort((a, b) => b.score - a.score); + } catch (error) { + this.log('warning', `Failed to extract links from ${baseUrl}: ${error}`); + return []; + } + } + + /** + * Extract meaningful content from HTML page + */ + private extractHtmlContent( + html: string, + url: string + ): Array<{ + title: string; + content: string; + url: string; + }> { + try { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const { document } = parseHTML(html) as any; + const sections: Array<{ title: string; content: string; url: string }> = + []; + + // Try to find main content area + const mainContent = + document.querySelector('main') || + document.querySelector('article') || + document.querySelector('.content') || + document.querySelector('#content') || + document.body; + + if (!mainContent) { + return []; + } + + // Extract sections based on headings + const headings = mainContent.querySelectorAll('h1, h2, h3'); + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + for (const heading of Array.from(headings) as any[]) { + const title = heading.textContent?.trim() || ''; + if (!title) continue; + + // Get content until next heading + let content = ''; + let currentElement = heading.nextElementSibling; + + while ( + currentElement && + !['H1', 'H2', 'H3'].includes(currentElement.tagName) + ) { + const text = currentElement.textContent?.trim(); + if (text) { + content += text + '\n\n'; + } + currentElement = currentElement.nextElementSibling; + } + + if (content.trim()) { + sections.push({ + title, + content: content.trim(), + url + }); + } + } + + return sections; + } catch (error) { + this.log('warning', `Failed to parse HTML from ${url}: ${error}`); + return []; + } + } + /** * Find relevant documentation sections */ private findRelevantDocs( - docs: string, + sections: Array<{ + title: string; + content: string; + url: string; + }>, input: z.infer, keywords: string[] ): Array<{ @@ -292,13 +757,23 @@ export class GetContextualDocsTool extends BaseTool< relevanceScore: number; matchReason?: string; }> { - const sections = this.parseSections(docs); const scoredSections = sections .map((section) => { - const score = this.calculateRelevance(section, keywords, input); + const score = this.calculateRelevance( + { + title: section.title, + content: section.content, + category: this.categorizeSection(section.title, section.content) + }, + keywords, + input + ); const reason = this.explainMatch(section, keywords, input); return { - ...section, + title: section.title, + excerpt: this.extractExcerpt(section.content), + category: this.categorizeSection(section.title, section.content), + url: section.url, relevanceScore: score, matchReason: reason }; From f9189333310e8a5e14efbc2724a92de37ea253e8 Mon Sep 17 00:00:00 2001 From: Matthew Podwysocki Date: Fri, 13 Feb 2026 15:49:02 -0500 Subject: [PATCH 3/3] Improve HTML parsing for Docusaurus pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Docusaurus-specific selectors (article, [id*="docs-content"], .markdown) - Extract meta descriptions as fallback - Add paragraph/list/code extraction when headings are sparse - Support H4 headings - Add content length limits (2000 chars for paragraph extraction) - Better fallback chain: headings → paragraphs → meta description This fixes content extraction from Mapbox example pages which use Docusaurus and don't have many section headings. Now queries like "popup on hover" work without needing web search fallback. Co-Authored-By: Claude Sonnet 4.5 --- .../GetContextualDocsTool.ts | 101 ++++++++++++++---- test-keywords.js | 29 +++++ 2 files changed, 107 insertions(+), 23 deletions(-) create mode 100644 test-keywords.js diff --git a/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts index 2fb0798..7618c7a 100644 --- a/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts +++ b/src/tools/get-contextual-docs-tool/GetContextualDocsTool.ts @@ -687,45 +687,100 @@ export class GetContextualDocsTool extends BaseTool< const sections: Array<{ title: string; content: string; url: string }> = []; - // Try to find main content area + // Extract meta description as fallback + const metaDescription = + document + .querySelector('meta[name="description"]') + ?.getAttribute('content') || ''; + const pageTitle = + document.querySelector('title')?.textContent?.trim() || + url.split('/').pop() || + 'Documentation'; + + // Try to find main content area (Docusaurus-specific selectors first) const mainContent = - document.querySelector('main') || document.querySelector('article') || + document.querySelector('[id*="docs-content"]') || + document.querySelector('.markdown') || + document.querySelector('main') || document.querySelector('.content') || document.querySelector('#content') || document.body; if (!mainContent) { + // Return meta description as fallback + if (metaDescription) { + return [ + { + title: pageTitle, + content: metaDescription, + url + } + ]; + } return []; } // Extract sections based on headings - const headings = mainContent.querySelectorAll('h1, h2, h3'); + const headings = mainContent.querySelectorAll('h1, h2, h3, h4'); + const headingArray = Array.from(headings) as any[]; // eslint-disable-line @typescript-eslint/no-explicit-any + + if (headingArray.length > 0) { + // Extract content by heading sections + for (const heading of headingArray) { + const title = heading.textContent?.trim() || ''; + if (!title) continue; + + // Get content until next heading + let content = ''; + let currentElement = heading.nextElementSibling; + + while ( + currentElement && + !['H1', 'H2', 'H3', 'H4'].includes(currentElement.tagName) + ) { + const text = currentElement.textContent?.trim(); + if (text) { + content += text + '\n\n'; + } + currentElement = currentElement.nextElementSibling; + } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - for (const heading of Array.from(headings) as any[]) { - const title = heading.textContent?.trim() || ''; - if (!title) continue; - - // Get content until next heading - let content = ''; - let currentElement = heading.nextElementSibling; - - while ( - currentElement && - !['H1', 'H2', 'H3'].includes(currentElement.tagName) - ) { - const text = currentElement.textContent?.trim(); - if (text) { - content += text + '\n\n'; + if (content.trim()) { + sections.push({ + title, + content: content.trim(), + url + }); + } + } + } + + // If no heading-based sections found, extract all paragraphs and create one section + if (sections.length === 0) { + const paragraphs = mainContent.querySelectorAll('p, li, code, pre'); + let allContent = ''; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + for (const para of Array.from(paragraphs) as any[]) { + const text = para.textContent?.trim(); + if (text && text.length > 20) { + // Skip very short snippets + allContent += text + '\n\n'; } - currentElement = currentElement.nextElementSibling; } - if (content.trim()) { + if (allContent.trim()) { + sections.push({ + title: pageTitle, + content: allContent.trim().substring(0, 2000), // Limit to 2000 chars + url + }); + } else if (metaDescription) { + // Final fallback to meta description sections.push({ - title, - content: content.trim(), + title: pageTitle, + content: metaDescription, url }); } diff --git a/test-keywords.js b/test-keywords.js new file mode 100644 index 0000000..f8f9cff --- /dev/null +++ b/test-keywords.js @@ -0,0 +1,29 @@ +// Quick test to see what keywords would be extracted +const context = + 'Creating popups that appear on hover over features or markers in Mapbox GL JS'; + +const stopWords = new Set([ + 'the', + 'and', + 'for', + 'with', + 'this', + 'that', + 'from', + 'have', + 'has', + 'can', + 'will', + 'what', + 'how', + 'when', + 'where', + 'why' +]); + +const contextWords = context.toLowerCase().match(/\b[a-z]{3,}\b/g) || []; +const keywords = contextWords.filter((word) => !stopWords.has(word)); + +console.log('Extracted keywords:', keywords); +console.log('Should match "hover":', keywords.includes('hover')); +console.log('Should match "popups":', keywords.includes('popups'));