From e98833868dd5ca300158a1469fbd37e954eaa487 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 4 Apr 2026 14:43:06 +0000 Subject: [PATCH 01/19] Add AI-powered PostgreSQL assistant CLI tool - app.py: Main CLI loop with rich terminal output, argument parsing - llm_client.py: Ollama API client for LLM communication - mcp_client.py: MCP PostgreSQL server client for query execution - sql_generator.py: Prompt engineering, SQL extraction, and safety validation - requirements.txt: Python dependencies (requests, rich) - README.md: Architecture docs, usage examples, installation instructions Features: - Natural language to SQL via Ollama (codellama model) - Schema-aware prompt engineering - SQL safety enforcement (SELECT-only, blocks dangerous keywords) - Retry logic for failed SQL generation - Rich formatted output with timing metrics - Interactive CLI commands (help, schema, clear, exit) --- tools/pg-assistant/README.md | 117 ++++++++++ tools/pg-assistant/app.py | 326 ++++++++++++++++++++++++++++ tools/pg-assistant/llm_client.py | 115 ++++++++++ tools/pg-assistant/mcp_client.py | 174 +++++++++++++++ tools/pg-assistant/requirements.txt | 2 + tools/pg-assistant/sql_generator.py | 235 ++++++++++++++++++++ 6 files changed, 969 insertions(+) create mode 100644 tools/pg-assistant/README.md create mode 100644 tools/pg-assistant/app.py create mode 100644 tools/pg-assistant/llm_client.py create mode 100644 tools/pg-assistant/mcp_client.py create mode 100644 tools/pg-assistant/requirements.txt create mode 100644 tools/pg-assistant/sql_generator.py diff --git a/tools/pg-assistant/README.md b/tools/pg-assistant/README.md new file mode 100644 index 0000000..dda1004 --- /dev/null +++ b/tools/pg-assistant/README.md @@ -0,0 +1,117 @@ +# pg-assistant — AI-Powered PostgreSQL CLI + +A production-ready Python CLI that converts natural language questions into SQL queries using a local LLM (Ollama) and executes them against PostgreSQL via an MCP server. + +## Architecture + +``` +User Question (natural language) + │ + ▼ +┌──────────────────┐ +│ sql_generator │ ← Prompt engineering + safety validation +│ │ +│ ┌────────────┐ │ +│ │ llm_client │──┼──→ Ollama API (codellama) +│ └────────────┘ │ +└────────┬─────────┘ + │ validated SELECT query + ▼ +┌──────────────────┐ +│ mcp_client │──→ MCP PostgreSQL Server +└──────────────────┘ + │ + ▼ + Formatted Results (rich tables) +``` + +| Module | Responsibility | +|-------------------|--------------------------------------------------| +| `app.py` | CLI loop, argument parsing, rich output | +| `llm_client.py` | Ollama API communication | +| `mcp_client.py` | MCP PostgreSQL server communication | +| `sql_generator.py` | Prompt engineering, SQL extraction, safety checks | + +## Prerequisites + +- **Python 3.10+** +- **Ollama** running locally with the `codellama` model pulled: + ```bash + ollama serve & + ollama pull codellama + ``` +- **MCP PostgreSQL server** running on `http://localhost:3000` +- **PostgreSQL** with `pg_stat_statements` enabled + +## Installation + +```bash +cd tools/pg-assistant +pip install -r requirements.txt +``` + +## Usage + +```bash +# Basic usage (defaults: Ollama on :11434, MCP on :3000) +python app.py + +# Custom endpoints +python app.py --ollama-url http://localhost:11434 --mcp-url http://localhost:3000 + +# Use a different model +python app.py --model mistral + +# Verbose/debug logging +python app.py -v + +# Specify a PostgreSQL schema +python app.py --schema my_schema +``` + +### CLI Commands + +| Command | Description | +|------------|--------------------------------------| +| `help` | Show available commands and examples | +| `schema` | Refresh and display database schema | +| `clear` | Clear the terminal screen | +| `exit` | Quit the application | + +### Example Session + +``` +pg-assistant> Show me the top 5 largest tables + +┌─────────────────────────────────────────────────┐ +│ Generated SQL │ +├─────────────────────────────────────────────────┤ +│ SELECT schemaname, relname, n_live_tup │ +│ FROM pg_stat_user_tables │ +│ ORDER BY n_live_tup DESC │ +│ LIMIT 5; │ +└─────────────────────────────────────────────────┘ + +┌─────────────┬──────────┬────────────┐ +│ schemaname │ relname │ n_live_tup │ +├─────────────┼──────────┼────────────┤ +│ public │ orders │ 1000000 │ +│ public │ users │ 500000 │ +│ ... │ ... │ ... │ +└─────────────┴──────────┴────────────┘ +5 row(s) returned in 42ms +``` + +## SQL Safety + +The assistant enforces **read-only access** by: + +1. Blocking dangerous keywords: `DROP`, `DELETE`, `TRUNCATE`, `UPDATE`, `INSERT`, `ALTER`, `CREATE`, `GRANT`, `REVOKE`, `EXEC`, `EXECUTE` +2. Requiring queries to start with `SELECT` or `WITH` (CTEs) +3. Stripping string literals before keyword scanning to avoid false positives + +## Schema Awareness + +On startup, the assistant fetches `information_schema` metadata and injects it into every LLM prompt. This provides the model with table names, column names, data types, and constraints — significantly improving SQL generation accuracy. + +Refresh the schema at any time with the `schema` command. diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py new file mode 100644 index 0000000..d1437f6 --- /dev/null +++ b/tools/pg-assistant/app.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 +"""AI-powered PostgreSQL assistant CLI application. + +Converts natural language questions into SQL queries using a local LLM (Ollama) +and executes them against a PostgreSQL database via an MCP server. +""" + +import argparse +import logging +import sys +import time + +from rich.console import Console +from rich.logging import RichHandler +from rich.panel import Panel +from rich.table import Table +from rich.text import Text + +from llm_client import LLMClient +from mcp_client import MCPClient +from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError + +console = Console() + +HELP_TEXT = """ +[bold cyan]Available Commands:[/bold cyan] + + [green]exit[/green] / [green]quit[/green] Quit the application + [green]help[/green] Show this help message + [green]schema[/green] Refresh and display the database schema + [green]clear[/green] Clear the terminal screen + +[bold cyan]Example Questions:[/bold cyan] + + • Show me all tables in the database + • What are the top 10 largest tables by row count? + • List all active connections to the database + • Show the slowest queries from pg_stat_statements + • What indexes exist on the users table? + • Show me the table structure for the orders table +""" + +BANNER = r""" +[bold cyan]╔══════════════════════════════════════════════════╗ +║ AI PostgreSQL Assistant (pg-assistant) ║ +║ Natural Language → SQL via Ollama + MCP Server ║ +╚══════════════════════════════════════════════════╝[/bold cyan] +""" + + +def setup_logging(verbose: bool = False) -> None: + """Configure logging with rich handler.""" + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig( + level=level, + format="%(message)s", + datefmt="[%X]", + handlers=[RichHandler(console=console, rich_tracebacks=True, show_path=False)], + ) + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="AI-powered PostgreSQL assistant using Ollama and MCP", + ) + parser.add_argument( + "--ollama-url", + default="http://localhost:11434", + help="Ollama server URL (default: http://localhost:11434)", + ) + parser.add_argument( + "--mcp-url", + default="http://localhost:3000", + help="MCP PostgreSQL server URL (default: http://localhost:3000)", + ) + parser.add_argument( + "--model", + default="codellama", + help="Ollama model name (default: codellama)", + ) + parser.add_argument( + "--schema", + default="public", + help="PostgreSQL schema to use for context (default: public)", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Enable verbose/debug logging", + ) + return parser.parse_args() + + +def check_services(llm_client: LLMClient, mcp_client: MCPClient) -> bool: + """Verify that Ollama and MCP services are reachable.""" + all_ok = True + + with console.status("[bold yellow]Checking Ollama server..."): + if llm_client.health_check(): + console.print(" [green]✓[/green] Ollama server is reachable") + models = llm_client.list_models() + if models: + model_names = [m.get("name", "unknown") for m in models] + console.print(f" Available models: {', '.join(model_names)}") + else: + console.print( + f" [red]✗[/red] Cannot reach Ollama at {llm_client.base_url}" + ) + all_ok = False + + with console.status("[bold yellow]Checking MCP server..."): + if mcp_client.health_check(): + console.print(" [green]✓[/green] MCP PostgreSQL server is reachable") + else: + console.print( + f" [red]✗[/red] Cannot reach MCP server at {mcp_client.base_url}" + ) + all_ok = False + + return all_ok + + +def load_schema( + mcp_client: MCPClient, + sql_generator: SQLGenerator, + schema_name: str, +) -> None: + """Load and display database schema metadata.""" + with console.status("[bold yellow]Loading database schema..."): + schema = mcp_client.get_schema(schema_name) + + if schema: + sql_generator.update_schema(schema) + display_schema(schema) + else: + console.print( + "[yellow]⚠ Could not load schema metadata. " + "SQL generation will proceed without schema context.[/yellow]" + ) + + +def display_schema(schema: dict) -> None: + """Render the database schema as a rich table.""" + table = Table( + title="Database Schema", + show_header=True, + header_style="bold magenta", + ) + table.add_column("Table", style="cyan", no_wrap=True) + table.add_column("Column", style="green") + table.add_column("Type", style="yellow") + table.add_column("Nullable", style="dim") + + for table_name, columns in schema.items(): + for i, col in enumerate(columns): + table.add_row( + table_name if i == 0 else "", + col["column_name"], + col["data_type"], + col["is_nullable"], + ) + table.add_section() + + console.print(table) + + +def display_results(result: dict) -> None: + """Render query results as a rich table.""" + if "error" in result: + console.print(f"\n[red]Query Error:[/red] {result['error']}") + return + + columns = result.get("columns", []) + rows = result.get("rows", []) + row_count = result.get("row_count", len(rows)) + elapsed_ms = result.get("elapsed_ms", 0) + + if not rows: + console.print("\n[yellow]Query returned no results.[/yellow]") + return + + table = Table( + title="Query Results", + show_header=True, + header_style="bold magenta", + show_lines=True, + ) + + # Determine column names + if columns: + col_names = columns + elif rows and isinstance(rows[0], dict): + col_names = list(rows[0].keys()) + else: + col_names = [f"col_{i}" for i in range(len(rows[0]) if rows else 0)] + + for col_name in col_names: + table.add_column(str(col_name), style="cyan", overflow="fold") + + for row in rows: + if isinstance(row, dict): + table.add_row(*[str(v) if v is not None else "NULL" for v in row.values()]) + elif isinstance(row, (list, tuple)): + table.add_row(*[str(v) if v is not None else "NULL" for v in row]) + + console.print(table) + console.print(f"\n[dim]{row_count} row(s) returned in {elapsed_ms}ms[/dim]") + + +def process_query( + user_input: str, + sql_generator: SQLGenerator, + mcp_client: MCPClient, +) -> None: + """Process a natural language query end-to-end.""" + # Step 1: Generate SQL + console.print() + with console.status("[bold yellow]Generating SQL..."): + start_gen = time.monotonic() + try: + sql = sql_generator.generate_sql(user_input) + except UnsafeSQLError as exc: + console.print(f"\n[red]Safety Block:[/red] {exc}") + return + except SQLGenerationError as exc: + console.print(f"\n[red]Generation Error:[/red] {exc}") + return + gen_elapsed = time.monotonic() - start_gen + + # Step 2: Display generated SQL + console.print( + Panel( + Text(sql, style="green"), + title="[bold]Generated SQL[/bold]", + subtitle=f"[dim]generated in {gen_elapsed:.2f}s[/dim]", + border_style="blue", + ) + ) + + # Step 3: Execute SQL + with console.status("[bold yellow]Executing query..."): + start_exec = time.monotonic() + try: + result = mcp_client.execute_query(sql) + except (ConnectionError, RuntimeError) as exc: + console.print(f"\n[red]Execution Error:[/red] {exc}") + return + exec_elapsed = time.monotonic() - start_exec + + # Step 4: Display results + if "elapsed_ms" not in result: + result["elapsed_ms"] = round(exec_elapsed * 1000, 2) + + display_results(result) + + +def main() -> None: + """Main CLI entry point.""" + args = parse_args() + setup_logging(verbose=args.verbose) + + console.print(BANNER) + + # Initialize clients + llm_client = LLMClient( + base_url=args.ollama_url, + model=args.model, + ) + mcp_client = MCPClient(base_url=args.mcp_url) + sql_generator = SQLGenerator(llm_client=llm_client) + + # Check service connectivity + if not check_services(llm_client, mcp_client): + console.print( + "\n[bold red]Some services are not available. " + "Please ensure Ollama and MCP server are running.[/bold red]" + ) + console.print( + "[dim]Continuing anyway — errors will appear when you submit queries.[/dim]" + ) + + # Load schema + load_schema(mcp_client, sql_generator, args.schema) + + console.print( + '\n[dim]Type a natural language question, or "help" for commands.[/dim]\n' + ) + + # Main REPL loop + while True: + try: + user_input = console.input( + "[bold green]pg-assistant>[/bold green] " + ).strip() + except (KeyboardInterrupt, EOFError): + console.print("\n[dim]Goodbye![/dim]") + sys.exit(0) + + if not user_input: + continue + + command = user_input.lower() + + if command in ("exit", "quit"): + console.print("[dim]Goodbye![/dim]") + sys.exit(0) + + if command == "help": + console.print(HELP_TEXT) + continue + + if command == "schema": + load_schema(mcp_client, sql_generator, args.schema) + continue + + if command == "clear": + console.clear() + continue + + process_query(user_input, sql_generator, mcp_client) + + +if __name__ == "__main__": + main() diff --git a/tools/pg-assistant/llm_client.py b/tools/pg-assistant/llm_client.py new file mode 100644 index 0000000..bb34846 --- /dev/null +++ b/tools/pg-assistant/llm_client.py @@ -0,0 +1,115 @@ +"""LLM client module for communicating with Ollama API.""" + +import logging +import time +from typing import Optional + +import requests + +logger = logging.getLogger(__name__) + +DEFAULT_OLLAMA_URL = "http://localhost:11434" +DEFAULT_MODEL = "codellama" +DEFAULT_TIMEOUT = 120 + + +class LLMClient: + """Client for interacting with the Ollama LLM API.""" + + def __init__( + self, + base_url: str = DEFAULT_OLLAMA_URL, + model: str = DEFAULT_MODEL, + timeout: int = DEFAULT_TIMEOUT, + ) -> None: + self.base_url = base_url.rstrip("/") + self.model = model + self.timeout = timeout + self.generate_url = f"{self.base_url}/api/generate" + + def generate(self, prompt: str, system_prompt: str = "") -> str: + """Send a prompt to Ollama and return the generated text. + + Args: + prompt: The user prompt to send. + system_prompt: Optional system-level instruction. + + Returns: + The generated text response. + + Raises: + ConnectionError: If the Ollama server is unreachable. + RuntimeError: If the API returns an error. + """ + payload: dict = { + "model": self.model, + "prompt": prompt, + "stream": False, + } + if system_prompt: + payload["system"] = system_prompt + + logger.debug("Sending request to Ollama: model=%s", self.model) + start = time.monotonic() + + try: + response = requests.post( + self.generate_url, + json=payload, + timeout=self.timeout, + ) + except requests.ConnectionError as exc: + logger.error("Cannot reach Ollama at %s", self.base_url) + raise ConnectionError( + f"Cannot connect to Ollama at {self.base_url}. " + "Ensure Ollama is running (ollama serve)." + ) from exc + except requests.Timeout as exc: + logger.error("Ollama request timed out after %ds", self.timeout) + raise RuntimeError( + f"Ollama request timed out after {self.timeout}s." + ) from exc + + elapsed = time.monotonic() - start + logger.debug("Ollama responded in %.2fs", elapsed) + + if response.status_code != 200: + error_detail = response.text[:500] + logger.error("Ollama API error %d: %s", response.status_code, error_detail) + raise RuntimeError( + f"Ollama API returned status {response.status_code}: {error_detail}" + ) + + data = response.json() + generated_text: str = data.get("response", "").strip() + + if not generated_text: + logger.warning("Ollama returned an empty response") + + return generated_text + + def health_check(self) -> bool: + """Check whether the Ollama server is reachable. + + Returns: + True if the server responds, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/", timeout=5) + return response.status_code == 200 + except (requests.ConnectionError, requests.Timeout): + return False + + def list_models(self) -> Optional[list]: + """List available models on the Ollama server. + + Returns: + A list of model info dicts, or None on failure. + """ + try: + response = requests.get(f"{self.base_url}/api/tags", timeout=10) + if response.status_code == 200: + return response.json().get("models", []) + except (requests.ConnectionError, requests.Timeout): + pass + return None diff --git a/tools/pg-assistant/mcp_client.py b/tools/pg-assistant/mcp_client.py new file mode 100644 index 0000000..e4bc230 --- /dev/null +++ b/tools/pg-assistant/mcp_client.py @@ -0,0 +1,174 @@ +"""MCP (Model Context Protocol) client for PostgreSQL server communication.""" + +import logging +import time +from typing import Any, Optional + +import requests + +logger = logging.getLogger(__name__) + +DEFAULT_MCP_URL = "http://localhost:3000" +DEFAULT_TIMEOUT = 30 + + +class MCPClient: + """Client for interacting with the MCP PostgreSQL server.""" + + def __init__( + self, + base_url: str = DEFAULT_MCP_URL, + timeout: int = DEFAULT_TIMEOUT, + ) -> None: + self.base_url = base_url.rstrip("/") + self.timeout = timeout + + def execute_query(self, sql: str) -> dict[str, Any]: + """Execute a SQL query via the MCP server. + + Args: + sql: The SQL query string to execute. + + Returns: + A dict with keys 'columns' and 'rows' on success, + or 'error' on failure. + + Raises: + ConnectionError: If the MCP server is unreachable. + RuntimeError: If the MCP server returns an error response. + """ + payload = { + "method": "query", + "params": {"sql": sql}, + } + + logger.debug("Sending query to MCP server: %s", sql[:200]) + start = time.monotonic() + + try: + response = requests.post( + self.base_url, + json=payload, + timeout=self.timeout, + ) + except requests.ConnectionError as exc: + logger.error("Cannot reach MCP server at %s", self.base_url) + raise ConnectionError( + f"Cannot connect to MCP server at {self.base_url}. " + "Ensure the MCP PostgreSQL server is running." + ) from exc + except requests.Timeout as exc: + logger.error("MCP request timed out after %ds", self.timeout) + raise RuntimeError( + f"MCP server request timed out after {self.timeout}s." + ) from exc + + elapsed = time.monotonic() - start + logger.debug("MCP server responded in %.2fs", elapsed) + + if response.status_code != 200: + error_detail = response.text[:500] + logger.error("MCP server error %d: %s", response.status_code, error_detail) + raise RuntimeError( + f"MCP server returned status {response.status_code}: {error_detail}" + ) + + data: dict[str, Any] = response.json() + + if "error" in data: + error_msg = data["error"] + logger.warning("MCP query error: %s", error_msg) + return {"error": str(error_msg)} + + return { + "columns": data.get("columns", []), + "rows": data.get("rows", []), + "row_count": data.get("rowCount", len(data.get("rows", []))), + "elapsed_ms": round(elapsed * 1000, 2), + } + + def get_schema(self, schema_name: str = "public") -> Optional[dict[str, Any]]: + """Retrieve database schema metadata via the MCP server. + + Args: + schema_name: The PostgreSQL schema to inspect. + + Returns: + Schema metadata dict, or None on failure. + """ + sql = f""" + SELECT + t.table_name, + c.column_name, + c.data_type, + c.is_nullable, + c.column_default + FROM information_schema.tables t + JOIN information_schema.columns c + ON t.table_name = c.table_name + AND t.table_schema = c.table_schema + WHERE t.table_schema = '{schema_name}' + AND t.table_type = 'BASE TABLE' + ORDER BY t.table_name, c.ordinal_position; + """ + try: + result = self.execute_query(sql) + if "error" in result: + logger.warning("Failed to fetch schema: %s", result["error"]) + return None + return self._parse_schema(result) + except (ConnectionError, RuntimeError) as exc: + logger.warning("Failed to fetch schema: %s", exc) + return None + + def health_check(self) -> bool: + """Check whether the MCP server is reachable. + + Returns: + True if the server responds, False otherwise. + """ + try: + response = requests.get(self.base_url, timeout=5) + return response.status_code < 500 + except (requests.ConnectionError, requests.Timeout): + return False + + @staticmethod + def _parse_schema(result: dict[str, Any]) -> dict[str, list[dict[str, str]]]: + """Parse raw schema query results into a structured dict. + + Args: + result: The raw query result from execute_query. + + Returns: + A dict mapping table names to lists of column info dicts. + """ + schema: dict[str, list[dict[str, str]]] = {} + columns = result.get("columns", []) + rows = result.get("rows", []) + + for row in rows: + if isinstance(row, dict): + table = row.get("table_name", "") + col_info = { + "column_name": row.get("column_name", ""), + "data_type": row.get("data_type", ""), + "is_nullable": row.get("is_nullable", ""), + "column_default": row.get("column_default", ""), + } + elif isinstance(row, (list, tuple)) and len(columns) >= 5: + table = str(row[0]) + col_info = { + "column_name": str(row[1]), + "data_type": str(row[2]), + "is_nullable": str(row[3]), + "column_default": str(row[4]) if row[4] else "", + } + else: + continue + + if table not in schema: + schema[table] = [] + schema[table].append(col_info) + + return schema diff --git a/tools/pg-assistant/requirements.txt b/tools/pg-assistant/requirements.txt new file mode 100644 index 0000000..8f33262 --- /dev/null +++ b/tools/pg-assistant/requirements.txt @@ -0,0 +1,2 @@ +requests>=2.31.0,<3.0.0 +rich>=13.7.0,<15.0.0 diff --git a/tools/pg-assistant/sql_generator.py b/tools/pg-assistant/sql_generator.py new file mode 100644 index 0000000..42dd313 --- /dev/null +++ b/tools/pg-assistant/sql_generator.py @@ -0,0 +1,235 @@ +"""SQL generation module with prompt engineering and safety validation.""" + +import logging +import re +from typing import Any, Optional + +from llm_client import LLMClient + +logger = logging.getLogger(__name__) + +SYSTEM_PROMPT = ( + "You are a PostgreSQL expert. You receive natural language questions about " + "a PostgreSQL database and return ONLY valid SQL SELECT queries. " + "Rules:\n" + "- Return ONLY the SQL query, nothing else.\n" + "- Do NOT include explanations, comments, or markdown formatting.\n" + "- Do NOT use DROP, DELETE, TRUNCATE, UPDATE, INSERT, ALTER, CREATE, or GRANT.\n" + "- Only generate SELECT statements.\n" + "- Always terminate the query with a semicolon.\n" + "- If the question cannot be answered with a SELECT query, respond with: " + "-- CANNOT_GENERATE" +) + +DANGEROUS_KEYWORDS = frozenset( + { + "DROP", + "DELETE", + "TRUNCATE", + "UPDATE", + "INSERT", + "ALTER", + "CREATE", + "GRANT", + "REVOKE", + "EXEC", + "EXECUTE", + } +) + +MAX_RETRIES = 2 + + +class SQLGenerationError(Exception): + """Raised when SQL generation fails after retries.""" + + +class UnsafeSQLError(Exception): + """Raised when generated SQL contains dangerous operations.""" + + +class SQLGenerator: + """Generates safe SQL queries from natural language using an LLM.""" + + def __init__( + self, + llm_client: LLMClient, + schema_metadata: Optional[dict[str, Any]] = None, + ) -> None: + self.llm_client = llm_client + self.schema_metadata = schema_metadata + + def update_schema(self, schema_metadata: dict[str, Any]) -> None: + """Update the schema metadata used for prompt context. + + Args: + schema_metadata: Dict mapping table names to column info lists. + """ + self.schema_metadata = schema_metadata + logger.info("Schema metadata updated: %d tables", len(schema_metadata)) + + def generate_sql(self, user_query: str) -> str: + """Generate a SQL query from a natural language question. + + Retries up to MAX_RETRIES times if validation fails. + + Args: + user_query: The natural language question. + + Returns: + A validated SQL SELECT query string. + + Raises: + SQLGenerationError: If generation fails after all retries. + UnsafeSQLError: If the generated SQL is unsafe. + """ + prompt = self._build_prompt(user_query) + + last_error: Optional[str] = None + for attempt in range(1, MAX_RETRIES + 1): + logger.info("SQL generation attempt %d/%d", attempt, MAX_RETRIES) + + retry_prompt = prompt + if last_error and attempt > 1: + retry_prompt += ( + f"\n\nPrevious attempt failed with error: {last_error}\n" + "Please generate a corrected SQL query." + ) + + try: + raw_response = self.llm_client.generate( + prompt=retry_prompt, + system_prompt=SYSTEM_PROMPT, + ) + except (ConnectionError, RuntimeError) as exc: + logger.error("LLM request failed: %s", exc) + raise SQLGenerationError( + f"Failed to communicate with LLM: {exc}" + ) from exc + + sql = self._extract_sql(raw_response) + + if not sql or sql == "-- CANNOT_GENERATE": + last_error = "LLM could not generate a valid query" + logger.warning("Attempt %d: %s", attempt, last_error) + continue + + try: + self._validate_sql(sql) + return sql + except UnsafeSQLError: + raise + except ValueError as exc: + last_error = str(exc) + logger.warning("Attempt %d validation failed: %s", attempt, last_error) + continue + + raise SQLGenerationError( + f"Failed to generate valid SQL after {MAX_RETRIES} attempts. " + f"Last error: {last_error}" + ) + + def _build_prompt(self, user_query: str) -> str: + """Build the full prompt including schema context. + + Args: + user_query: The natural language question. + + Returns: + The complete prompt string. + """ + parts = [] + + if self.schema_metadata: + parts.append("Database schema:") + for table_name, columns in self.schema_metadata.items(): + col_defs = [] + for col in columns: + nullable = "NULL" if col["is_nullable"] == "YES" else "NOT NULL" + default = ( + f" DEFAULT {col['column_default']}" + if col.get("column_default") + else "" + ) + col_defs.append( + f" {col['column_name']} {col['data_type']} {nullable}{default}" + ) + parts.append(f"TABLE {table_name} (\n" + ",\n".join(col_defs) + "\n)") + parts.append("") + + parts.append(f"Question: {user_query}") + parts.append("SQL:") + + return "\n".join(parts) + + @staticmethod + def _extract_sql(raw_response: str) -> str: + """Extract clean SQL from the LLM response. + + Strips markdown code blocks, comments, and extra whitespace. + + Args: + raw_response: The raw LLM output. + + Returns: + A cleaned SQL string. + """ + text = raw_response.strip() + + # Remove markdown code fences + code_block_match = re.search( + r"```(?:sql)?\s*\n?(.*?)\n?```", text, re.DOTALL | re.IGNORECASE + ) + if code_block_match: + text = code_block_match.group(1).strip() + + # Remove leading/trailing comments + lines = [] + for line in text.split("\n"): + stripped = line.strip() + if stripped and not stripped.startswith("--"): + lines.append(line) + elif stripped == "-- CANNOT_GENERATE": + return "-- CANNOT_GENERATE" + + sql = "\n".join(lines).strip() + + # Ensure trailing semicolon + if sql and not sql.endswith(";"): + sql += ";" + + return sql + + @staticmethod + def _validate_sql(sql: str) -> None: + """Validate that the SQL is a safe SELECT query. + + Args: + sql: The SQL query to validate. + + Raises: + UnsafeSQLError: If the query contains dangerous keywords. + ValueError: If the query is not a valid SELECT statement. + """ + if not sql: + raise ValueError("Empty SQL query") + + normalized = sql.upper().strip() + + # Remove string literals to avoid false positives on keywords inside quotes + sanitized = re.sub(r"'[^']*'", "''", normalized) + + # Check for dangerous keywords as standalone words + for keyword in DANGEROUS_KEYWORDS: + pattern = rf"\b{keyword}\b" + if re.search(pattern, sanitized): + raise UnsafeSQLError( + f"Unsafe SQL detected: query contains '{keyword}'. " + "Only SELECT queries are allowed." + ) + + # Verify it starts with SELECT or WITH (for CTEs) + if not (sanitized.startswith("SELECT") or sanitized.startswith("WITH")): + raise ValueError( + f"Query must start with SELECT or WITH. Got: {sql[:50]}..." + ) From 86aa9c77422b931627059d4255d02d89892137c5 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 4 Apr 2026 17:06:16 +0000 Subject: [PATCH 02/19] Refactor pg-assistant: Streamlit UI, direct PostgreSQL, connection profiles - Replace CLI (app.py) with Streamlit web UI - Replace MCP client with direct PostgreSQL connection via psycopg2 (db_client.py) - Add connection profile manager for save/load DB configs (profile_manager.py) - Update requirements.txt with streamlit, psycopg2-binary, pandas - Update README with new architecture and usage docs - Keep llm_client.py and sql_generator.py unchanged --- tools/pg-assistant/README.md | 95 ++-- tools/pg-assistant/app.py | 632 +++++++++++++------------- tools/pg-assistant/db_client.py | 174 +++++++ tools/pg-assistant/profile_manager.py | 113 +++++ tools/pg-assistant/requirements.txt | 4 +- 5 files changed, 658 insertions(+), 360 deletions(-) create mode 100644 tools/pg-assistant/db_client.py create mode 100644 tools/pg-assistant/profile_manager.py diff --git a/tools/pg-assistant/README.md b/tools/pg-assistant/README.md index dda1004..7d240c9 100644 --- a/tools/pg-assistant/README.md +++ b/tools/pg-assistant/README.md @@ -1,6 +1,6 @@ -# pg-assistant — AI-Powered PostgreSQL CLI +# pg-assistant — AI-Powered PostgreSQL Assistant -A production-ready Python CLI that converts natural language questions into SQL queries using a local LLM (Ollama) and executes them against PostgreSQL via an MCP server. +A Streamlit web UI that converts natural language questions into SQL queries using a local LLM (Ollama) and executes them directly against PostgreSQL. Includes connection profile management for saving and loading database configurations. ## Architecture @@ -18,19 +18,20 @@ User Question (natural language) │ validated SELECT query ▼ ┌──────────────────┐ -│ mcp_client │──→ MCP PostgreSQL Server +│ db_client │──→ PostgreSQL (direct via psycopg2) └──────────────────┘ │ ▼ - Formatted Results (rich tables) + Streamlit Web UI (tables, charts, CSV export) ``` -| Module | Responsibility | -|-------------------|--------------------------------------------------| -| `app.py` | CLI loop, argument parsing, rich output | -| `llm_client.py` | Ollama API communication | -| `mcp_client.py` | MCP PostgreSQL server communication | -| `sql_generator.py` | Prompt engineering, SQL extraction, safety checks | +| Module | Responsibility | +|---------------------|---------------------------------------------------| +| `app.py` | Streamlit web UI | +| `llm_client.py` | Ollama API communication | +| `db_client.py` | Direct PostgreSQL connection via psycopg2 | +| `sql_generator.py` | Prompt engineering, SQL extraction, safety checks | +| `profile_manager.py`| Save / load database connection profiles (JSON) | ## Prerequisites @@ -40,8 +41,7 @@ User Question (natural language) ollama serve & ollama pull codellama ``` -- **MCP PostgreSQL server** running on `http://localhost:3000` -- **PostgreSQL** with `pg_stat_statements` enabled +- **PostgreSQL** database accessible from the machine running pg-assistant ## Installation @@ -53,54 +53,39 @@ pip install -r requirements.txt ## Usage ```bash -# Basic usage (defaults: Ollama on :11434, MCP on :3000) -python app.py +# Start the Streamlit web UI +streamlit run app.py -# Custom endpoints -python app.py --ollama-url http://localhost:11434 --mcp-url http://localhost:3000 - -# Use a different model -python app.py --model mistral +# Or with a custom port +streamlit run app.py --server.port 8502 +``` -# Verbose/debug logging -python app.py -v +Then open the URL shown in your terminal (default: `http://localhost:8501`). -# Specify a PostgreSQL schema -python app.py --schema my_schema -``` +### Web UI Features -### CLI Commands +| Feature | Description | +|------------------------|----------------------------------------------------| +| **Ollama Settings** | Configure Ollama URL and model in the sidebar | +| **DB Connection** | Enter host, port, database, user, password, SSL | +| **Connection Profiles**| Save, load, and delete database connection profiles | +| **Query Tab** | Type natural language questions, view generated SQL | +| **Schema Tab** | Browse database tables and columns | +| **History Tab** | Review past queries and results | +| **CSV Export** | Download query results as CSV | -| Command | Description | -|------------|--------------------------------------| -| `help` | Show available commands and examples | -| `schema` | Refresh and display database schema | -| `clear` | Clear the terminal screen | -| `exit` | Quit the application | +### Connection Profiles -### Example Session +Profiles are saved to `~/.pg-assistant/profiles.json`. Each profile stores: +- Host, port, database name +- Username and password +- SSL mode -``` -pg-assistant> Show me the top 5 largest tables - -┌─────────────────────────────────────────────────┐ -│ Generated SQL │ -├─────────────────────────────────────────────────┤ -│ SELECT schemaname, relname, n_live_tup │ -│ FROM pg_stat_user_tables │ -│ ORDER BY n_live_tup DESC │ -│ LIMIT 5; │ -└─────────────────────────────────────────────────┘ - -┌─────────────┬──────────┬────────────┐ -│ schemaname │ relname │ n_live_tup │ -├─────────────┼──────────┼────────────┤ -│ public │ orders │ 1000000 │ -│ public │ users │ 500000 │ -│ ... │ ... │ ... │ -└─────────────┴──────────┴────────────┘ -5 row(s) returned in 42ms -``` +To use profiles: +1. Fill in connection details in the sidebar +2. Enter a profile name and click **Save Current Settings** +3. Next time, select the profile from the **Load Profile** dropdown +4. Click **Connect** to establish the connection ## SQL Safety @@ -112,6 +97,6 @@ The assistant enforces **read-only access** by: ## Schema Awareness -On startup, the assistant fetches `information_schema` metadata and injects it into every LLM prompt. This provides the model with table names, column names, data types, and constraints — significantly improving SQL generation accuracy. +On connection, the assistant fetches `information_schema` metadata and injects it into every LLM prompt. This provides the model with table names, column names, data types, and constraints — significantly improving SQL generation accuracy. -Refresh the schema at any time with the `schema` command. +Refresh the schema at any time via the **Schema** tab. diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index d1437f6..9bf4edc 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -1,326 +1,350 @@ #!/usr/bin/env python3 -"""AI-powered PostgreSQL assistant CLI application. +"""AI-powered PostgreSQL assistant — Streamlit web UI. Converts natural language questions into SQL queries using a local LLM (Ollama) -and executes them against a PostgreSQL database via an MCP server. +and executes them directly against a PostgreSQL database. """ -import argparse -import logging -import sys import time -from rich.console import Console -from rich.logging import RichHandler -from rich.panel import Panel -from rich.table import Table -from rich.text import Text +import pandas as pd +import streamlit as st +from db_client import DBClient from llm_client import LLMClient -from mcp_client import MCPClient +from profile_manager import ProfileManager from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError -console = Console() - -HELP_TEXT = """ -[bold cyan]Available Commands:[/bold cyan] - - [green]exit[/green] / [green]quit[/green] Quit the application - [green]help[/green] Show this help message - [green]schema[/green] Refresh and display the database schema - [green]clear[/green] Clear the terminal screen - -[bold cyan]Example Questions:[/bold cyan] - - • Show me all tables in the database - • What are the top 10 largest tables by row count? - • List all active connections to the database - • Show the slowest queries from pg_stat_statements - • What indexes exist on the users table? - • Show me the table structure for the orders table -""" - -BANNER = r""" -[bold cyan]╔══════════════════════════════════════════════════╗ -║ AI PostgreSQL Assistant (pg-assistant) ║ -║ Natural Language → SQL via Ollama + MCP Server ║ -╚══════════════════════════════════════════════════╝[/bold cyan] -""" - - -def setup_logging(verbose: bool = False) -> None: - """Configure logging with rich handler.""" - level = logging.DEBUG if verbose else logging.INFO - logging.basicConfig( - level=level, - format="%(message)s", - datefmt="[%X]", - handlers=[RichHandler(console=console, rich_tracebacks=True, show_path=False)], - ) - - -def parse_args() -> argparse.Namespace: - """Parse command-line arguments.""" - parser = argparse.ArgumentParser( - description="AI-powered PostgreSQL assistant using Ollama and MCP", - ) - parser.add_argument( - "--ollama-url", - default="http://localhost:11434", - help="Ollama server URL (default: http://localhost:11434)", - ) - parser.add_argument( - "--mcp-url", - default="http://localhost:3000", - help="MCP PostgreSQL server URL (default: http://localhost:3000)", - ) - parser.add_argument( - "--model", - default="codellama", - help="Ollama model name (default: codellama)", - ) - parser.add_argument( - "--schema", - default="public", - help="PostgreSQL schema to use for context (default: public)", - ) - parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="Enable verbose/debug logging", - ) - return parser.parse_args() - - -def check_services(llm_client: LLMClient, mcp_client: MCPClient) -> bool: - """Verify that Ollama and MCP services are reachable.""" - all_ok = True - - with console.status("[bold yellow]Checking Ollama server..."): - if llm_client.health_check(): - console.print(" [green]✓[/green] Ollama server is reachable") - models = llm_client.list_models() - if models: - model_names = [m.get("name", "unknown") for m in models] - console.print(f" Available models: {', '.join(model_names)}") +# --------------------------------------------------------------------------- +# Page config +# --------------------------------------------------------------------------- +st.set_page_config( + page_title="PG Assistant", + page_icon="🐘", + layout="wide", + initial_sidebar_state="expanded", +) + +# --------------------------------------------------------------------------- +# Session-state defaults +# --------------------------------------------------------------------------- +_defaults: dict = { + "db_client": None, + "llm_client": None, + "sql_generator": None, + "schema_metadata": None, + "query_history": [], +} +for _key, _val in _defaults.items(): + if _key not in st.session_state: + st.session_state[_key] = _val + +profile_mgr = ProfileManager() + +# --------------------------------------------------------------------------- +# Sidebar — connection & profile management +# --------------------------------------------------------------------------- +with st.sidebar: + st.title("🐘 PG Assistant") + st.caption("AI-powered PostgreSQL query tool") + st.divider() + + # --- Ollama settings --------------------------------------------------- + st.subheader("🤖 Ollama Settings") + ollama_url = st.text_input("Ollama URL", value="http://localhost:11434") + ollama_model = st.text_input("Model", value="codellama") + + if st.button("Test Ollama Connection"): + test_llm = LLMClient(base_url=ollama_url, model=ollama_model) + if test_llm.health_check(): + models = test_llm.list_models() + model_names = [m.get("name", "?") for m in (models or [])] + st.success(f"Connected! Models: {', '.join(model_names)}") else: - console.print( - f" [red]✗[/red] Cannot reach Ollama at {llm_client.base_url}" - ) - all_ok = False + st.error(f"Cannot reach Ollama at {ollama_url}") + + st.divider() + + # --- Database connection ------------------------------------------------ + st.subheader("🗄️ Database Connection") + + saved_profiles = profile_mgr.list_profiles() + profile_options = ["-- New Connection --"] + saved_profiles + selected_profile = st.selectbox("Load Profile", profile_options) + + profile_data: dict = {} + if selected_profile != "-- New Connection --": + profile_data = profile_mgr.get_profile(selected_profile) or {} + + col1, col2 = st.columns(2) + with col1: + db_host = st.text_input("Host", value=profile_data.get("host", "localhost")) + db_port = st.number_input( + "Port", + value=profile_data.get("port", 5432), + min_value=1, + max_value=65535, + step=1, + ) + db_name = st.text_input( + "Database", value=profile_data.get("database", "postgres") + ) + with col2: + db_user = st.text_input("User", value=profile_data.get("user", "postgres")) + db_password = st.text_input( + "Password", + value=profile_data.get("password", ""), + type="password", + ) + db_sslmode = st.selectbox( + "SSL Mode", + ["prefer", "disable", "require", "verify-ca", "verify-full"], + index=[ + "prefer", + "disable", + "require", + "verify-ca", + "verify-full", + ].index(profile_data.get("sslmode", "prefer")), + ) - with console.status("[bold yellow]Checking MCP server..."): - if mcp_client.health_check(): - console.print(" [green]✓[/green] MCP PostgreSQL server is reachable") + if st.button("🔌 Connect", use_container_width=True, type="primary"): + try: + db = DBClient( + host=db_host, + port=int(db_port), + database=db_name, + user=db_user, + password=db_password, + sslmode=db_sslmode, + ) + db.connect() + st.session_state.db_client = db + + llm = LLMClient(base_url=ollama_url, model=ollama_model) + st.session_state.llm_client = llm + gen = SQLGenerator(llm_client=llm) + st.session_state.sql_generator = gen + + schema = db.get_schema() + if schema: + gen.update_schema(schema) + st.session_state.schema_metadata = schema + + st.success(f"Connected to {db.get_connection_info()}") + except ConnectionError as exc: + st.error(str(exc)) + + if st.session_state.db_client and st.session_state.db_client.is_connected: + if st.button("Disconnect", use_container_width=True): + st.session_state.db_client.disconnect() + st.session_state.db_client = None + st.session_state.sql_generator = None + st.session_state.schema_metadata = None + st.rerun() + + st.divider() + + # --- Profile save / delete ---------------------------------------------- + st.subheader("💾 Save Profile") + profile_name = st.text_input("Profile Name", placeholder="e.g. production-db") + if st.button("Save Current Settings", use_container_width=True): + if not profile_name: + st.warning("Enter a profile name first.") else: - console.print( - f" [red]✗[/red] Cannot reach MCP server at {mcp_client.base_url}" + profile_mgr.save_profile( + name=profile_name, + host=db_host, + port=int(db_port), + database=db_name, + user=db_user, + password=db_password, + sslmode=db_sslmode, ) - all_ok = False - - return all_ok - - -def load_schema( - mcp_client: MCPClient, - sql_generator: SQLGenerator, - schema_name: str, -) -> None: - """Load and display database schema metadata.""" - with console.status("[bold yellow]Loading database schema..."): - schema = mcp_client.get_schema(schema_name) - - if schema: - sql_generator.update_schema(schema) - display_schema(schema) - else: - console.print( - "[yellow]⚠ Could not load schema metadata. " - "SQL generation will proceed without schema context.[/yellow]" + st.success(f"Profile '{profile_name}' saved!") + st.rerun() + + if saved_profiles: + st.divider() + st.subheader("🗑️ Delete Profile") + delete_target = st.selectbox( + "Select profile", saved_profiles, key="del_profile" ) - - -def display_schema(schema: dict) -> None: - """Render the database schema as a rich table.""" - table = Table( - title="Database Schema", - show_header=True, - header_style="bold magenta", + if st.button("Delete", use_container_width=True): + profile_mgr.delete_profile(delete_target) + st.success(f"Profile '{delete_target}' deleted.") + st.rerun() + +# --------------------------------------------------------------------------- +# Main area +# --------------------------------------------------------------------------- +st.header("🐘 AI PostgreSQL Assistant") + +if st.session_state.db_client and st.session_state.db_client.is_connected: + st.info( + f"Connected to **{st.session_state.db_client.get_connection_info()}** " + f"| Model: **{ollama_model}**" ) - table.add_column("Table", style="cyan", no_wrap=True) - table.add_column("Column", style="green") - table.add_column("Type", style="yellow") - table.add_column("Nullable", style="dim") - - for table_name, columns in schema.items(): - for i, col in enumerate(columns): - table.add_row( - table_name if i == 0 else "", - col["column_name"], - col["data_type"], - col["is_nullable"], - ) - table.add_section() - - console.print(table) - - -def display_results(result: dict) -> None: - """Render query results as a rich table.""" - if "error" in result: - console.print(f"\n[red]Query Error:[/red] {result['error']}") - return - - columns = result.get("columns", []) - rows = result.get("rows", []) - row_count = result.get("row_count", len(rows)) - elapsed_ms = result.get("elapsed_ms", 0) - - if not rows: - console.print("\n[yellow]Query returned no results.[/yellow]") - return - - table = Table( - title="Query Results", - show_header=True, - header_style="bold magenta", - show_lines=True, - ) - - # Determine column names - if columns: - col_names = columns - elif rows and isinstance(rows[0], dict): - col_names = list(rows[0].keys()) - else: - col_names = [f"col_{i}" for i in range(len(rows[0]) if rows else 0)] - - for col_name in col_names: - table.add_column(str(col_name), style="cyan", overflow="fold") - - for row in rows: - if isinstance(row, dict): - table.add_row(*[str(v) if v is not None else "NULL" for v in row.values()]) - elif isinstance(row, (list, tuple)): - table.add_row(*[str(v) if v is not None else "NULL" for v in row]) - - console.print(table) - console.print(f"\n[dim]{row_count} row(s) returned in {elapsed_ms}ms[/dim]") - - -def process_query( - user_input: str, - sql_generator: SQLGenerator, - mcp_client: MCPClient, -) -> None: - """Process a natural language query end-to-end.""" - # Step 1: Generate SQL - console.print() - with console.status("[bold yellow]Generating SQL..."): - start_gen = time.monotonic() - try: - sql = sql_generator.generate_sql(user_input) - except UnsafeSQLError as exc: - console.print(f"\n[red]Safety Block:[/red] {exc}") - return - except SQLGenerationError as exc: - console.print(f"\n[red]Generation Error:[/red] {exc}") - return - gen_elapsed = time.monotonic() - start_gen - - # Step 2: Display generated SQL - console.print( - Panel( - Text(sql, style="green"), - title="[bold]Generated SQL[/bold]", - subtitle=f"[dim]generated in {gen_elapsed:.2f}s[/dim]", - border_style="blue", - ) +else: + st.warning("Not connected to a database. Use the sidebar to connect.") + +# --------------------------------------------------------------------------- +# Tabs +# --------------------------------------------------------------------------- +tab_query, tab_schema, tab_history = st.tabs(["💬 Query", "📋 Schema", "📜 History"]) + +# ---- Query tab ------------------------------------------------------------ +with tab_query: + st.subheader("Ask a question in natural language") + + user_question = st.text_area( + "Your question", + placeholder="e.g. Show me the top 10 largest tables by row count", + height=100, + label_visibility="collapsed", ) - # Step 3: Execute SQL - with console.status("[bold yellow]Executing query..."): - start_exec = time.monotonic() - try: - result = mcp_client.execute_query(sql) - except (ConnectionError, RuntimeError) as exc: - console.print(f"\n[red]Execution Error:[/red] {exc}") - return - exec_elapsed = time.monotonic() - start_exec - - # Step 4: Display results - if "elapsed_ms" not in result: - result["elapsed_ms"] = round(exec_elapsed * 1000, 2) - - display_results(result) - - -def main() -> None: - """Main CLI entry point.""" - args = parse_args() - setup_logging(verbose=args.verbose) - - console.print(BANNER) - - # Initialize clients - llm_client = LLMClient( - base_url=args.ollama_url, - model=args.model, - ) - mcp_client = MCPClient(base_url=args.mcp_url) - sql_generator = SQLGenerator(llm_client=llm_client) - - # Check service connectivity - if not check_services(llm_client, mcp_client): - console.print( - "\n[bold red]Some services are not available. " - "Please ensure Ollama and MCP server are running.[/bold red]" + col_run, col_examples = st.columns([1, 3]) + with col_run: + run_btn = st.button( + "🚀 Run Query", + use_container_width=True, + type="primary", + disabled=not ( + st.session_state.db_client + and st.session_state.db_client.is_connected + and user_question.strip() + ), ) - console.print( - "[dim]Continuing anyway — errors will appear when you submit queries.[/dim]" - ) - - # Load schema - load_schema(mcp_client, sql_generator, args.schema) - - console.print( - '\n[dim]Type a natural language question, or "help" for commands.[/dim]\n' - ) - - # Main REPL loop - while True: - try: - user_input = console.input( - "[bold green]pg-assistant>[/bold green] " - ).strip() - except (KeyboardInterrupt, EOFError): - console.print("\n[dim]Goodbye![/dim]") - sys.exit(0) - - if not user_input: - continue - - command = user_input.lower() - - if command in ("exit", "quit"): - console.print("[dim]Goodbye![/dim]") - sys.exit(0) - - if command == "help": - console.print(HELP_TEXT) - continue - - if command == "schema": - load_schema(mcp_client, sql_generator, args.schema) - continue - - if command == "clear": - console.clear() - continue - - process_query(user_input, sql_generator, mcp_client) + with col_examples: + with st.expander("Example questions"): + st.markdown( + "- Show me all tables in the database\n" + "- What are the top 10 largest tables by row count?\n" + "- List all active connections to the database\n" + "- Show the slowest queries from pg_stat_statements\n" + "- What indexes exist on the users table?\n" + "- Show database size for each table" + ) + if run_btn and user_question.strip(): + generator = st.session_state.sql_generator + db = st.session_state.db_client -if __name__ == "__main__": - main() + if not generator or not db: + st.error("Connect to a database first.") + else: + with st.spinner("Generating SQL..."): + gen_start = time.monotonic() + try: + sql = generator.generate_sql(user_question.strip()) + gen_elapsed = time.monotonic() - gen_start + except UnsafeSQLError as exc: + st.error(f"**Safety Block:** {exc}") + sql = None + gen_elapsed = 0 + except SQLGenerationError as exc: + st.error(f"**Generation Error:** {exc}") + sql = None + gen_elapsed = 0 + + if sql: + st.subheader("Generated SQL") + st.code(sql, language="sql") + st.caption(f"Generated in {gen_elapsed:.2f}s") + + with st.spinner("Executing query..."): + result = db.execute_query(sql) + + if "error" in result: + st.error(f"**Query Error:** {result['error']}") + st.session_state.query_history.append( + { + "question": user_question.strip(), + "sql": sql, + "status": "error", + "error": result["error"], + "elapsed_ms": result.get("elapsed_ms", 0), + } + ) + else: + rows = result.get("rows", []) + row_count = result.get("row_count", 0) + elapsed_ms = result.get("elapsed_ms", 0) + + st.subheader("Results") + if rows: + df = pd.DataFrame(rows) + st.dataframe(df, use_container_width=True) + st.caption(f"{row_count} row(s) returned in {elapsed_ms}ms") + + csv = df.to_csv(index=False) + st.download_button( + "📥 Download CSV", + csv, + file_name="query_results.csv", + mime="text/csv", + ) + else: + st.info("Query returned no results.") + + st.session_state.query_history.append( + { + "question": user_question.strip(), + "sql": sql, + "status": "success", + "row_count": row_count, + "elapsed_ms": elapsed_ms, + } + ) + +# ---- Schema tab ----------------------------------------------------------- +with tab_schema: + st.subheader("Database Schema") + + if st.session_state.db_client and st.session_state.db_client.is_connected: + if st.button("🔄 Refresh Schema"): + schema = st.session_state.db_client.get_schema() + if schema: + if st.session_state.sql_generator: + st.session_state.sql_generator.update_schema(schema) + st.session_state.schema_metadata = schema + st.success("Schema refreshed!") + else: + st.warning("Could not load schema.") + + schema = st.session_state.schema_metadata + if schema: + st.caption(f"{len(schema)} table(s) found") + for table_name, columns in schema.items(): + with st.expander(f"📋 {table_name} ({len(columns)} columns)"): + col_df = pd.DataFrame(columns) + st.dataframe(col_df, use_container_width=True, hide_index=True) + else: + st.info("No schema loaded. Click 'Refresh Schema' to load.") + else: + st.warning("Connect to a database first.") + +# ---- History tab ---------------------------------------------------------- +with tab_history: + st.subheader("Query History") + + history = st.session_state.query_history + if history: + if st.button("🗑️ Clear History"): + st.session_state.query_history = [] + st.rerun() + + for _i, entry in enumerate(reversed(history), 1): + status_label = "[OK]" if entry["status"] == "success" else "[ERR]" + with st.expander(f"{status_label} {entry['question'][:80]}"): + st.code(entry["sql"], language="sql") + if entry["status"] == "success": + st.caption( + f"{entry.get('row_count', 0)} rows | " + f"{entry.get('elapsed_ms', 0)}ms" + ) + else: + st.error(entry.get("error", "Unknown error")) + else: + st.info("No queries yet. Ask a question in the Query tab!") diff --git a/tools/pg-assistant/db_client.py b/tools/pg-assistant/db_client.py new file mode 100644 index 0000000..8357f86 --- /dev/null +++ b/tools/pg-assistant/db_client.py @@ -0,0 +1,174 @@ +"""Direct PostgreSQL database client using psycopg2.""" + +import logging +import time +from typing import Any, Optional + +import psycopg2 +import psycopg2.extras + +logger = logging.getLogger(__name__) + + +class DBClient: + """Client for direct PostgreSQL database connections.""" + + def __init__( + self, + host: str, + port: int, + database: str, + user: str, + password: str, + sslmode: str = "prefer", + ) -> None: + self.conn_params = { + "host": host, + "port": port, + "dbname": database, + "user": user, + "password": password, + "sslmode": sslmode, + } + self._conn: Optional[psycopg2.extensions.connection] = None + + def connect(self) -> None: + """Establish a connection to PostgreSQL. + + Raises: + ConnectionError: If the database is unreachable. + """ + try: + self._conn = psycopg2.connect(**self.conn_params) + self._conn.autocommit = True + logger.info( + "Connected to PostgreSQL at %s:%s/%s", + self.conn_params["host"], + self.conn_params["port"], + self.conn_params["dbname"], + ) + except psycopg2.OperationalError as exc: + logger.error("Failed to connect to PostgreSQL: %s", exc) + raise ConnectionError(f"Cannot connect to PostgreSQL: {exc}") from exc + + def disconnect(self) -> None: + """Close the database connection.""" + if self._conn and not self._conn.closed: + self._conn.close() + logger.info("Disconnected from PostgreSQL") + + @property + def is_connected(self) -> bool: + """Check whether the connection is active.""" + if self._conn is None or self._conn.closed: + return False + try: + with self._conn.cursor() as cur: + cur.execute("SELECT 1") + return True + except psycopg2.Error: + return False + + def execute_query(self, sql: str) -> dict[str, Any]: + """Execute a SQL query and return results. + + Args: + sql: The SQL query string to execute. + + Returns: + A dict with 'columns', 'rows', 'row_count', and 'elapsed_ms'. + + Raises: + ConnectionError: If not connected to the database. + RuntimeError: If the query fails. + """ + if not self.is_connected: + raise ConnectionError("Not connected to PostgreSQL. Please connect first.") + + start = time.monotonic() + try: + with self._conn.cursor( + cursor_factory=psycopg2.extras.RealDictCursor + ) as cur: + cur.execute(sql) + columns = ( + [desc[0] for desc in cur.description] if cur.description else [] + ) + rows = cur.fetchall() if cur.description else [] + elapsed = time.monotonic() - start + return { + "columns": columns, + "rows": [dict(row) for row in rows], + "row_count": len(rows), + "elapsed_ms": round(elapsed * 1000, 2), + } + except psycopg2.Error as exc: + elapsed = time.monotonic() - start + logger.error("Query execution failed: %s", exc) + return { + "error": str(exc).strip(), + "elapsed_ms": round(elapsed * 1000, 2), + } + + def get_schema( + self, schema_name: str = "public" + ) -> Optional[dict[str, list[dict[str, str]]]]: + """Retrieve database schema metadata. + + Args: + schema_name: The PostgreSQL schema to inspect. + + Returns: + Schema metadata dict mapping table names to column info lists, + or None on failure. + """ + sql = """ + SELECT + t.table_name, + c.column_name, + c.data_type, + c.is_nullable, + c.column_default + FROM information_schema.tables t + JOIN information_schema.columns c + ON t.table_name = c.table_name + AND t.table_schema = c.table_schema + WHERE t.table_schema = %s + AND t.table_type = 'BASE TABLE' + ORDER BY t.table_name, c.ordinal_position; + """ + if not self.is_connected: + return None + + try: + with self._conn.cursor( + cursor_factory=psycopg2.extras.RealDictCursor + ) as cur: + cur.execute(sql, (schema_name,)) + rows = cur.fetchall() + except psycopg2.Error as exc: + logger.warning("Failed to fetch schema: %s", exc) + return None + + schema: dict[str, list[dict[str, str]]] = {} + for row in rows: + table = row["table_name"] + col_info = { + "column_name": row["column_name"], + "data_type": row["data_type"], + "is_nullable": row["is_nullable"], + "column_default": row["column_default"] or "", + } + if table not in schema: + schema[table] = [] + schema[table].append(col_info) + + return schema + + def get_connection_info(self) -> str: + """Return a display-friendly connection string (password masked).""" + return ( + f"{self.conn_params['user']}@" + f"{self.conn_params['host']}:{self.conn_params['port']}/" + f"{self.conn_params['dbname']}" + ) diff --git a/tools/pg-assistant/profile_manager.py b/tools/pg-assistant/profile_manager.py new file mode 100644 index 0000000..456d65b --- /dev/null +++ b/tools/pg-assistant/profile_manager.py @@ -0,0 +1,113 @@ +"""Database connection profile manager — save and load profiles as JSON.""" + +import json +import logging +import os +from pathlib import Path +from typing import Any, Optional + +logger = logging.getLogger(__name__) + +DEFAULT_PROFILES_DIR = os.path.join(str(Path.home()), ".pg-assistant") +PROFILES_FILE = "profiles.json" + + +class ProfileManager: + """Manages saved database connection profiles.""" + + def __init__(self, profiles_dir: str = DEFAULT_PROFILES_DIR) -> None: + self.profiles_dir = profiles_dir + self.profiles_path = os.path.join(profiles_dir, PROFILES_FILE) + self._ensure_dir() + + def _ensure_dir(self) -> None: + """Create the profiles directory if it doesn't exist.""" + os.makedirs(self.profiles_dir, exist_ok=True) + + def _load_all(self) -> dict[str, dict[str, Any]]: + """Load all profiles from disk.""" + if not os.path.exists(self.profiles_path): + return {} + try: + with open(self.profiles_path, "r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict): + return data + except (json.JSONDecodeError, OSError) as exc: + logger.warning("Failed to load profiles: %s", exc) + return {} + + def _save_all(self, profiles: dict[str, dict[str, Any]]) -> None: + """Save all profiles to disk.""" + try: + with open(self.profiles_path, "w", encoding="utf-8") as f: + json.dump(profiles, f, indent=2) + logger.info("Profiles saved to %s", self.profiles_path) + except OSError as exc: + logger.error("Failed to save profiles: %s", exc) + + def list_profiles(self) -> list[str]: + """Return a list of saved profile names.""" + return list(self._load_all().keys()) + + def get_profile(self, name: str) -> Optional[dict[str, Any]]: + """Retrieve a saved profile by name. + + Args: + name: The profile name. + + Returns: + A dict with connection parameters, or None if not found. + """ + profiles = self._load_all() + return profiles.get(name) + + def save_profile( + self, + name: str, + host: str, + port: int, + database: str, + user: str, + password: str, + sslmode: str = "prefer", + ) -> None: + """Save a database connection profile. + + Args: + name: A friendly name for the profile. + host: PostgreSQL host. + port: PostgreSQL port. + database: Database name. + user: Database user. + password: Database password. + sslmode: SSL mode (default: prefer). + """ + profiles = self._load_all() + profiles[name] = { + "host": host, + "port": port, + "database": database, + "user": user, + "password": password, + "sslmode": sslmode, + } + self._save_all(profiles) + logger.info("Profile '%s' saved", name) + + def delete_profile(self, name: str) -> bool: + """Delete a saved profile. + + Args: + name: The profile name to delete. + + Returns: + True if deleted, False if not found. + """ + profiles = self._load_all() + if name in profiles: + del profiles[name] + self._save_all(profiles) + logger.info("Profile '%s' deleted", name) + return True + return False diff --git a/tools/pg-assistant/requirements.txt b/tools/pg-assistant/requirements.txt index 8f33262..7ef45a6 100644 --- a/tools/pg-assistant/requirements.txt +++ b/tools/pg-assistant/requirements.txt @@ -1,2 +1,4 @@ requests>=2.31.0,<3.0.0 -rich>=13.7.0,<15.0.0 +psycopg2-binary>=2.9.0,<3.0.0 +streamlit>=1.28.0,<2.0.0 +pandas>=2.0.0,<3.0.0 From ac1a5e7e0415fdb7ee2a904017a3010f774a39d7 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 4 Apr 2026 17:07:16 +0000 Subject: [PATCH 03/19] Remove unused mcp_client.py (replaced by db_client.py) --- tools/pg-assistant/mcp_client.py | 174 ------------------------------- 1 file changed, 174 deletions(-) delete mode 100644 tools/pg-assistant/mcp_client.py diff --git a/tools/pg-assistant/mcp_client.py b/tools/pg-assistant/mcp_client.py deleted file mode 100644 index e4bc230..0000000 --- a/tools/pg-assistant/mcp_client.py +++ /dev/null @@ -1,174 +0,0 @@ -"""MCP (Model Context Protocol) client for PostgreSQL server communication.""" - -import logging -import time -from typing import Any, Optional - -import requests - -logger = logging.getLogger(__name__) - -DEFAULT_MCP_URL = "http://localhost:3000" -DEFAULT_TIMEOUT = 30 - - -class MCPClient: - """Client for interacting with the MCP PostgreSQL server.""" - - def __init__( - self, - base_url: str = DEFAULT_MCP_URL, - timeout: int = DEFAULT_TIMEOUT, - ) -> None: - self.base_url = base_url.rstrip("/") - self.timeout = timeout - - def execute_query(self, sql: str) -> dict[str, Any]: - """Execute a SQL query via the MCP server. - - Args: - sql: The SQL query string to execute. - - Returns: - A dict with keys 'columns' and 'rows' on success, - or 'error' on failure. - - Raises: - ConnectionError: If the MCP server is unreachable. - RuntimeError: If the MCP server returns an error response. - """ - payload = { - "method": "query", - "params": {"sql": sql}, - } - - logger.debug("Sending query to MCP server: %s", sql[:200]) - start = time.monotonic() - - try: - response = requests.post( - self.base_url, - json=payload, - timeout=self.timeout, - ) - except requests.ConnectionError as exc: - logger.error("Cannot reach MCP server at %s", self.base_url) - raise ConnectionError( - f"Cannot connect to MCP server at {self.base_url}. " - "Ensure the MCP PostgreSQL server is running." - ) from exc - except requests.Timeout as exc: - logger.error("MCP request timed out after %ds", self.timeout) - raise RuntimeError( - f"MCP server request timed out after {self.timeout}s." - ) from exc - - elapsed = time.monotonic() - start - logger.debug("MCP server responded in %.2fs", elapsed) - - if response.status_code != 200: - error_detail = response.text[:500] - logger.error("MCP server error %d: %s", response.status_code, error_detail) - raise RuntimeError( - f"MCP server returned status {response.status_code}: {error_detail}" - ) - - data: dict[str, Any] = response.json() - - if "error" in data: - error_msg = data["error"] - logger.warning("MCP query error: %s", error_msg) - return {"error": str(error_msg)} - - return { - "columns": data.get("columns", []), - "rows": data.get("rows", []), - "row_count": data.get("rowCount", len(data.get("rows", []))), - "elapsed_ms": round(elapsed * 1000, 2), - } - - def get_schema(self, schema_name: str = "public") -> Optional[dict[str, Any]]: - """Retrieve database schema metadata via the MCP server. - - Args: - schema_name: The PostgreSQL schema to inspect. - - Returns: - Schema metadata dict, or None on failure. - """ - sql = f""" - SELECT - t.table_name, - c.column_name, - c.data_type, - c.is_nullable, - c.column_default - FROM information_schema.tables t - JOIN information_schema.columns c - ON t.table_name = c.table_name - AND t.table_schema = c.table_schema - WHERE t.table_schema = '{schema_name}' - AND t.table_type = 'BASE TABLE' - ORDER BY t.table_name, c.ordinal_position; - """ - try: - result = self.execute_query(sql) - if "error" in result: - logger.warning("Failed to fetch schema: %s", result["error"]) - return None - return self._parse_schema(result) - except (ConnectionError, RuntimeError) as exc: - logger.warning("Failed to fetch schema: %s", exc) - return None - - def health_check(self) -> bool: - """Check whether the MCP server is reachable. - - Returns: - True if the server responds, False otherwise. - """ - try: - response = requests.get(self.base_url, timeout=5) - return response.status_code < 500 - except (requests.ConnectionError, requests.Timeout): - return False - - @staticmethod - def _parse_schema(result: dict[str, Any]) -> dict[str, list[dict[str, str]]]: - """Parse raw schema query results into a structured dict. - - Args: - result: The raw query result from execute_query. - - Returns: - A dict mapping table names to lists of column info dicts. - """ - schema: dict[str, list[dict[str, str]]] = {} - columns = result.get("columns", []) - rows = result.get("rows", []) - - for row in rows: - if isinstance(row, dict): - table = row.get("table_name", "") - col_info = { - "column_name": row.get("column_name", ""), - "data_type": row.get("data_type", ""), - "is_nullable": row.get("is_nullable", ""), - "column_default": row.get("column_default", ""), - } - elif isinstance(row, (list, tuple)) and len(columns) >= 5: - table = str(row[0]) - col_info = { - "column_name": str(row[1]), - "data_type": str(row[2]), - "is_nullable": str(row[3]), - "column_default": str(row[4]) if row[4] else "", - } - else: - continue - - if table not in schema: - schema[table] = [] - schema[table].append(col_info) - - return schema From 4f7c457eb468e1a65e055159f061da299c6ffdd8 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 4 Apr 2026 17:29:14 +0000 Subject: [PATCH 04/19] Add multi-DB support (Oracle + PostgreSQL), auto-monitor, auto-analyse - Refactor db_client.py with abstract BaseDBClient, PostgreSQLClient, OracleClient - Add oracledb driver support (thin mode, no Oracle Client needed) - Add db_type dropdown in profile manager and connection sidebar - Add auto_monitor.py: periodic tablespace monitoring, auto-extend datafiles (max 20GB/file) - Add auto_analyse.py: AWR/pg_stat_statements analysis with LLM summary + action plan - Update sql_generator.py for dual-DB SQL dialects - Update Streamlit UI with Auto Monitor and Auto Analyse tabs - Update requirements.txt with oracledb dependency - Update README.md with new architecture and features --- tools/pg-assistant/README.md | 181 ++++++++----- tools/pg-assistant/app.py | 370 ++++++++++++++++++++++---- tools/pg-assistant/auto_analyse.py | 280 +++++++++++++++++++ tools/pg-assistant/auto_monitor.py | 350 ++++++++++++++++++++++++ tools/pg-assistant/db_client.py | 318 ++++++++++++++++++---- tools/pg-assistant/profile_manager.py | 30 +-- tools/pg-assistant/requirements.txt | 1 + tools/pg-assistant/sql_generator.py | 58 ++-- 8 files changed, 1380 insertions(+), 208 deletions(-) create mode 100644 tools/pg-assistant/auto_analyse.py create mode 100644 tools/pg-assistant/auto_monitor.py diff --git a/tools/pg-assistant/README.md b/tools/pg-assistant/README.md index 7d240c9..e8c3ca9 100644 --- a/tools/pg-assistant/README.md +++ b/tools/pg-assistant/README.md @@ -1,47 +1,62 @@ -# pg-assistant — AI-Powered PostgreSQL Assistant +# DB Assistant (pg-assistant) -A Streamlit web UI that converts natural language questions into SQL queries using a local LLM (Ollama) and executes them directly against PostgreSQL. Includes connection profile management for saving and loading database configurations. +AI-powered database assistant that converts natural language questions into SQL +queries using a local LLM (Ollama) and executes them against **PostgreSQL** or +**Oracle** databases via a Streamlit web UI. ## Architecture ``` -User Question (natural language) - │ - ▼ -┌──────────────────┐ -│ sql_generator │ ← Prompt engineering + safety validation -│ │ -│ ┌────────────┐ │ -│ │ llm_client │──┼──→ Ollama API (codellama) -│ └────────────┘ │ -└────────┬─────────┘ - │ validated SELECT query - ▼ -┌──────────────────┐ -│ db_client │──→ PostgreSQL (direct via psycopg2) -└──────────────────┘ - │ - ▼ - Streamlit Web UI (tables, charts, CSV export) +┌──────────────────────────────────────────────────────┐ +│ Streamlit Web UI │ +│ (app.py) │ +│ ┌──────────┬──────────┬───────────┬───────────────┐ │ +│ │ Query │ Schema │ Auto │ Auto │ │ +│ │ Tab │ Tab │ Monitor │ Analyse │ │ +│ └──────────┴──────────┴───────────┴───────────────┘ │ +└──────────┬──────────────┬──────────────┬─────────────┘ + │ │ │ + ┌──────▼──────┐ ┌─────▼─────┐ ┌─────▼──────┐ + │ sql_generator│ │ auto_ │ │ auto_ │ + │ .py │ │ monitor.py│ │ analyse.py │ + └──────┬──────┘ └─────┬─────┘ └─────┬──────┘ + │ │ │ + ┌──────▼──────┐ │ ┌──────▼──────┐ + │ llm_client │ │ │ llm_client │ + │ .py (Ollama)│ │ │ .py (Ollama)│ + └─────────────┘ │ └─────────────┘ + │ + ┌───────────▼───────────┐ + │ db_client.py │ + │ ┌─────────┬────────┐ │ + │ │ Postgre │ Oracle │ │ + │ │ SQL │ Client │ │ + │ │ Client │ │ │ + │ └─────────┴────────┘ │ + └───────────────────────┘ + ┌───────────────────────┐ + │ profile_manager.py │ + │ (~/.pg-assistant/ │ + │ profiles.json) │ + └───────────────────────┘ ``` -| Module | Responsibility | -|---------------------|---------------------------------------------------| -| `app.py` | Streamlit web UI | -| `llm_client.py` | Ollama API communication | -| `db_client.py` | Direct PostgreSQL connection via psycopg2 | -| `sql_generator.py` | Prompt engineering, SQL extraction, safety checks | -| `profile_manager.py`| Save / load database connection profiles (JSON) | +| Module | Purpose | +|---------------------|------------------------------------------------------| +| `app.py` | Streamlit web UI — tabs for Query, Schema, Monitor, Analyse, History | +| `db_client.py` | Abstract DB client with PostgreSQL (psycopg2) and Oracle (oracledb) implementations | +| `llm_client.py` | Ollama REST API client (`/api/generate`) | +| `sql_generator.py` | Prompt engineering, SQL extraction, safety validation, retry logic | +| `profile_manager.py`| Save/load/delete connection profiles as JSON | +| `auto_monitor.py` | Periodic tablespace monitoring, auto-extend datafiles (Oracle) | +| `auto_analyse.py` | AWR/V$ (Oracle) and pg_stat_statements (PG) analysis with LLM summary | ## Prerequisites - **Python 3.10+** -- **Ollama** running locally with the `codellama` model pulled: - ```bash - ollama serve & - ollama pull codellama - ``` -- **PostgreSQL** database accessible from the machine running pg-assistant +- **Ollama** running locally with a model (e.g. `codellama`) +- **PostgreSQL** and/or **Oracle** database accessible from this machine +- For Oracle: `oracledb` uses thin mode (no Oracle Client installation needed) ## Installation @@ -50,53 +65,87 @@ cd tools/pg-assistant pip install -r requirements.txt ``` +### Dependencies + +| Package | Purpose | +|-------------------|----------------------------| +| `requests` | Ollama HTTP API calls | +| `psycopg2-binary` | PostgreSQL driver | +| `oracledb` | Oracle driver (thin mode) | +| `streamlit` | Web UI framework | +| `pandas` | DataFrame display & CSV | + +> You only need the driver for the database(s) you plan to connect to. +> If you only use PostgreSQL, `oracledb` is optional (and vice versa). + ## Usage ```bash -# Start the Streamlit web UI streamlit run app.py - -# Or with a custom port -streamlit run app.py --server.port 8502 ``` -Then open the URL shown in your terminal (default: `http://localhost:8501`). +Then open **http://localhost:8501** in your browser. ### Web UI Features -| Feature | Description | -|------------------------|----------------------------------------------------| -| **Ollama Settings** | Configure Ollama URL and model in the sidebar | -| **DB Connection** | Enter host, port, database, user, password, SSL | -| **Connection Profiles**| Save, load, and delete database connection profiles | -| **Query Tab** | Type natural language questions, view generated SQL | -| **Schema Tab** | Browse database tables and columns | -| **History Tab** | Review past queries and results | -| **CSV Export** | Download query results as CSV | - -### Connection Profiles - -Profiles are saved to `~/.pg-assistant/profiles.json`. Each profile stores: -- Host, port, database name -- Username and password -- SSL mode - -To use profiles: -1. Fill in connection details in the sidebar -2. Enter a profile name and click **Save Current Settings** -3. Next time, select the profile from the **Load Profile** dropdown -4. Click **Connect** to establish the connection +1. **Sidebar** — Configure Ollama URL/model, select database type + (PostgreSQL or Oracle), enter connection details, save/load/delete profiles. + +2. **Query Tab** — Type a natural language question, see the generated SQL, + review results in an interactive table, download as CSV. + +3. **Schema Tab** — Browse database schema with expandable table views. + +4. **Auto Monitor Tab** — Configure threshold, interval, and max file size. + Start periodic monitoring or run a one-time check. + - **Oracle**: Monitors tablespace usage via `DBA_DATA_FILES` / `DBA_FREE_SPACE`. + Automatically enables autoextend or adds datafiles when usage exceeds threshold + (max 20 GB per file by default). + - **PostgreSQL**: Reports tablespace, database, and table sizes. + +5. **Auto Analyse Tab** — Collect performance data and generate an AI-powered + summary with action plan. + - **Oracle**: Queries `V$SQL`, `V$SYSTEM_EVENT`, `V$SYSSTAT`, `V$SGAINFO`, + `V$FILESTAT` for performance metrics. + - **PostgreSQL**: Queries `pg_stat_statements`, `pg_stat_user_tables`, + `pg_stat_database`, `pg_stat_bgwriter`, `pg_stat_user_indexes`. + +6. **History Tab** — Review past queries with status, row counts, and timing. + +## Connection Profiles + +Profiles are stored in `~/.pg-assistant/profiles.json` and include: + +| Field | Description | +|----------------|---------------------------------| +| `db_type` | `postgresql` or `oracle` | +| `host` | Database hostname | +| `port` | Database port | +| `database` | Database name (PostgreSQL) | +| `service_name` | Service name (Oracle) | +| `user` | Database username | +| `password` | Database password (plaintext) | +| `sslmode` | SSL mode (PostgreSQL only) | + +> **Security note**: Passwords are stored in plaintext. Use file-system +> permissions to restrict access, or consider integrating with a secrets +> manager for production use. ## SQL Safety -The assistant enforces **read-only access** by: +The SQL generator blocks dangerous keywords before execution: + +`DROP`, `DELETE`, `TRUNCATE`, `UPDATE`, `INSERT`, `ALTER`, `CREATE`, +`GRANT`, `REVOKE`, `EXEC`, `EXECUTE` -1. Blocking dangerous keywords: `DROP`, `DELETE`, `TRUNCATE`, `UPDATE`, `INSERT`, `ALTER`, `CREATE`, `GRANT`, `REVOKE`, `EXEC`, `EXECUTE` -2. Requiring queries to start with `SELECT` or `WITH` (CTEs) -3. Stripping string literals before keyword scanning to avoid false positives +Only `SELECT` and `WITH` (CTE) queries are allowed through the natural +language query path. The auto-monitor uses a separate internal path for +administrative DDL (e.g. `ALTER TABLESPACE`). ## Schema Awareness -On connection, the assistant fetches `information_schema` metadata and injects it into every LLM prompt. This provides the model with table names, column names, data types, and constraints — significantly improving SQL generation accuracy. +On connection, the tool fetches schema metadata and injects it into every +LLM prompt so the model generates accurate, table-aware SQL. -Refresh the schema at any time via the **Schema** tab. +- **PostgreSQL**: Queries `information_schema.tables` / `information_schema.columns` +- **Oracle**: Queries `ALL_TAB_COLUMNS` diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index 9bf4edc..d830787 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -"""AI-powered PostgreSQL assistant — Streamlit web UI. +"""AI-powered database assistant -- Streamlit web UI. Converts natural language questions into SQL queries using a local LLM (Ollama) -and executes them directly against a PostgreSQL database. +and executes them directly against PostgreSQL or Oracle databases. """ import time @@ -10,7 +10,14 @@ import pandas as pd import streamlit as st -from db_client import DBClient +from auto_analyse import PerformanceAnalyser +from auto_monitor import TablespaceMonitor +from db_client import ( + DB_TYPE_ORACLE, + DB_TYPE_POSTGRESQL, + BaseDBClient, + create_db_client, +) from llm_client import LLMClient from profile_manager import ProfileManager from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError @@ -19,8 +26,8 @@ # Page config # --------------------------------------------------------------------------- st.set_page_config( - page_title="PG Assistant", - page_icon="🐘", + page_title="DB Assistant", + page_icon="🛢️", layout="wide", initial_sidebar_state="expanded", ) @@ -34,6 +41,8 @@ "sql_generator": None, "schema_metadata": None, "query_history": [], + "monitor": None, + "analyser": None, } for _key, _val in _defaults.items(): if _key not in st.session_state: @@ -42,11 +51,23 @@ profile_mgr = ProfileManager() # --------------------------------------------------------------------------- -# Sidebar — connection & profile management +# Helper: current db_type from connected client +# --------------------------------------------------------------------------- + + +def _connected_db_type() -> str: + client: BaseDBClient | None = st.session_state.db_client + if client and client.is_connected: + return client.db_type + return "" + + +# --------------------------------------------------------------------------- +# Sidebar -- connection & profile management # --------------------------------------------------------------------------- with st.sidebar: - st.title("🐘 PG Assistant") - st.caption("AI-powered PostgreSQL query tool") + st.title("🛢️ DB Assistant") + st.caption("AI-powered PostgreSQL & Oracle query tool") st.divider() # --- Ollama settings --------------------------------------------------- @@ -68,6 +89,10 @@ # --- Database connection ------------------------------------------------ st.subheader("🗄️ Database Connection") + db_type_options = ["PostgreSQL", "Oracle"] + db_type_map = {"PostgreSQL": DB_TYPE_POSTGRESQL, "Oracle": DB_TYPE_ORACLE} + reverse_map = {v: k for k, v in db_type_map.items()} + saved_profiles = profile_mgr.list_profiles() profile_options = ["-- New Connection --"] + saved_profiles selected_profile = st.selectbox("Load Profile", profile_options) @@ -76,54 +101,81 @@ if selected_profile != "-- New Connection --": profile_data = profile_mgr.get_profile(selected_profile) or {} + profile_db_type = profile_data.get("db_type", DB_TYPE_POSTGRESQL) + default_type_idx = db_type_options.index( + reverse_map.get(profile_db_type, "PostgreSQL") + ) + selected_db_label = st.selectbox( + "Database Type", db_type_options, index=default_type_idx + ) + selected_db_type = db_type_map[selected_db_label] + col1, col2 = st.columns(2) with col1: db_host = st.text_input("Host", value=profile_data.get("host", "localhost")) db_port = st.number_input( "Port", - value=profile_data.get("port", 5432), + value=profile_data.get( + "port", 5432 if selected_db_type == DB_TYPE_POSTGRESQL else 1521 + ), min_value=1, max_value=65535, step=1, ) - db_name = st.text_input( - "Database", value=profile_data.get("database", "postgres") - ) + if selected_db_type == DB_TYPE_POSTGRESQL: + db_name = st.text_input( + "Database", value=profile_data.get("database", "postgres") + ) + else: + db_service = st.text_input( + "Service Name", value=profile_data.get("service_name", "ORCL") + ) with col2: - db_user = st.text_input("User", value=profile_data.get("user", "postgres")) + db_user = st.text_input( + "User", + value=profile_data.get( + "user", "postgres" if selected_db_type == DB_TYPE_POSTGRESQL else "" + ), + ) db_password = st.text_input( "Password", value=profile_data.get("password", ""), type="password", ) - db_sslmode = st.selectbox( - "SSL Mode", - ["prefer", "disable", "require", "verify-ca", "verify-full"], - index=[ - "prefer", - "disable", - "require", - "verify-ca", - "verify-full", - ].index(profile_data.get("sslmode", "prefer")), - ) + if selected_db_type == DB_TYPE_POSTGRESQL: + db_sslmode = st.selectbox( + "SSL Mode", + ["prefer", "disable", "require", "verify-ca", "verify-full"], + index=[ + "prefer", + "disable", + "require", + "verify-ca", + "verify-full", + ].index(profile_data.get("sslmode", "prefer")), + ) if st.button("🔌 Connect", use_container_width=True, type="primary"): try: - db = DBClient( - host=db_host, - port=int(db_port), - database=db_name, - user=db_user, - password=db_password, - sslmode=db_sslmode, - ) + conn_kwargs: dict = { + "host": db_host, + "port": int(db_port), + "user": db_user, + "password": db_password, + } + if selected_db_type == DB_TYPE_POSTGRESQL: + conn_kwargs["database"] = db_name + conn_kwargs["sslmode"] = db_sslmode + else: + conn_kwargs["service_name"] = db_service + + db = create_db_client(selected_db_type, **conn_kwargs) db.connect() st.session_state.db_client = db llm = LLMClient(base_url=ollama_url, model=ollama_model) st.session_state.llm_client = llm - gen = SQLGenerator(llm_client=llm) + gen = SQLGenerator(llm_client=llm, db_type=selected_db_type) st.session_state.sql_generator = gen schema = db.get_schema() @@ -131,16 +183,23 @@ gen.update_schema(schema) st.session_state.schema_metadata = schema + st.session_state.monitor = None + st.session_state.analyser = None + st.success(f"Connected to {db.get_connection_info()}") - except ConnectionError as exc: + except (ConnectionError, ImportError) as exc: st.error(str(exc)) if st.session_state.db_client and st.session_state.db_client.is_connected: if st.button("Disconnect", use_container_width=True): + if st.session_state.monitor: + st.session_state.monitor.stop() st.session_state.db_client.disconnect() st.session_state.db_client = None st.session_state.sql_generator = None st.session_state.schema_metadata = None + st.session_state.monitor = None + st.session_state.analyser = None st.rerun() st.divider() @@ -152,15 +211,20 @@ if not profile_name: st.warning("Enter a profile name first.") else: - profile_mgr.save_profile( - name=profile_name, - host=db_host, - port=int(db_port), - database=db_name, - user=db_user, - password=db_password, - sslmode=db_sslmode, - ) + save_kwargs: dict = { + "name": profile_name, + "db_type": selected_db_type, + "host": db_host, + "port": int(db_port), + "user": db_user, + "password": db_password, + } + if selected_db_type == DB_TYPE_POSTGRESQL: + save_kwargs["database"] = db_name + save_kwargs["sslmode"] = db_sslmode + else: + save_kwargs["service_name"] = db_service + profile_mgr.save_profile(**save_kwargs) st.success(f"Profile '{profile_name}' saved!") st.rerun() @@ -178,12 +242,13 @@ # --------------------------------------------------------------------------- # Main area # --------------------------------------------------------------------------- -st.header("🐘 AI PostgreSQL Assistant") +st.header("🛢️ AI Database Assistant") if st.session_state.db_client and st.session_state.db_client.is_connected: + db_label = _connected_db_type().upper() st.info( f"Connected to **{st.session_state.db_client.get_connection_info()}** " - f"| Model: **{ollama_model}**" + f"({db_label}) | Model: **{ollama_model}**" ) else: st.warning("Not connected to a database. Use the sidebar to connect.") @@ -191,7 +256,9 @@ # --------------------------------------------------------------------------- # Tabs # --------------------------------------------------------------------------- -tab_query, tab_schema, tab_history = st.tabs(["💬 Query", "📋 Schema", "📜 History"]) +tab_query, tab_schema, tab_monitor, tab_analyse, tab_history = st.tabs( + ["💬 Query", "📋 Schema", "📡 Auto Monitor", "📊 Auto Analyse", "📜 History"] +) # ---- Query tab ------------------------------------------------------------ with tab_query: @@ -325,6 +392,217 @@ else: st.warning("Connect to a database first.") +# ---- Auto Monitor tab ----------------------------------------------------- +with tab_monitor: + st.subheader("📡 Tablespace Auto Monitor") + + if not (st.session_state.db_client and st.session_state.db_client.is_connected): + st.warning("Connect to a database first.") + else: + db_client = st.session_state.db_client + + st.markdown( + "Periodically monitors tablespace usage and automatically extends " + "datafiles when usage exceeds the threshold (Oracle). " + "For PostgreSQL, reports storage metrics." + ) + + mcol1, mcol2, mcol3 = st.columns(3) + with mcol1: + mon_threshold = st.slider( + "Usage threshold (%)", 50, 99, 85, key="mon_threshold" + ) + with mcol2: + mon_interval = st.selectbox( + "Check interval", + [60, 300, 900, 1800, 3600], + index=4, + format_func=lambda x: ( + f"{x // 60} min" if x < 3600 else f"{x // 3600} hr" + ), + key="mon_interval", + ) + with mcol3: + mon_max_gb = st.number_input( + "Max file size (GB)", 1, 100, 20, key="mon_max_gb" + ) + + bcol1, bcol2, bcol3 = st.columns(3) + with bcol1: + if st.button( + "▶️ Start Auto Monitor", use_container_width=True, type="primary" + ): + monitor = TablespaceMonitor( + db_client=db_client, + threshold_pct=mon_threshold, + max_file_size_gb=mon_max_gb, + interval_sec=mon_interval, + ) + monitor.start() + st.session_state.monitor = monitor + st.success("Monitor started!") + with bcol2: + if st.button("⏹️ Stop Monitor", use_container_width=True): + if st.session_state.monitor: + st.session_state.monitor.stop() + st.info("Monitor stopped.") + with bcol3: + if st.button("🔍 Check Now", use_container_width=True): + monitor = st.session_state.monitor + if not monitor: + monitor = TablespaceMonitor( + db_client=db_client, + threshold_pct=mon_threshold, + max_file_size_gb=mon_max_gb, + ) + st.session_state.monitor = monitor + with st.spinner("Checking tablespace usage..."): + event = monitor.run_check() + st.success("Check complete!") + + if st.session_state.monitor and st.session_state.monitor.running: + st.info( + f"Monitor is running (interval: {st.session_state.monitor.interval_sec}s, " + f"threshold: {st.session_state.monitor.threshold_pct}%)" + ) + + # Display monitor events + monitor = st.session_state.monitor + if monitor and monitor.events: + st.divider() + st.subheader("Monitor Events") + for i, evt in enumerate(reversed(monitor.events[-20:])): + status_icon = {"ok": "🟢", "warning": "🟡", "error": "🔴"}.get( + evt["status"], "⚪" + ) + with st.expander( + f"{status_icon} {evt['timestamp']} - {evt['status'].upper()}" + ): + if evt.get("error"): + st.error(evt["error"]) + + ts_data = evt.get("tablespace_data", []) + display_rows = [ + r + for r in ts_data + if isinstance(r, dict) and "_section" not in r + ] + if display_rows: + st.caption("Tablespace Usage") + st.dataframe( + pd.DataFrame(display_rows), + use_container_width=True, + hide_index=True, + ) + + for section_item in ts_data: + if ( + isinstance(section_item, dict) + and "_section" in section_item + ): + st.caption(section_item["_section"].title()) + sec_rows = section_item.get("rows", []) + if sec_rows: + st.dataframe( + pd.DataFrame(sec_rows), + use_container_width=True, + hide_index=True, + ) + + actions = evt.get("actions", []) + if actions: + st.caption("Actions Taken") + for act in actions: + act_icon = ( + "✅" + if "added" in act.get("action", "") + or "enabled" in act.get("action", "") + else "❌" + ) + st.markdown(f"{act_icon} **{act.get('action', '')}**") + if act.get("sql"): + st.code(act["sql"], language="sql") + if act.get("error"): + st.error(act["error"]) + +# ---- Auto Analyse tab ----------------------------------------------------- +with tab_analyse: + st.subheader("📊 Performance Analysis") + + if not (st.session_state.db_client and st.session_state.db_client.is_connected): + st.warning("Connect to a database first.") + elif not st.session_state.llm_client: + st.warning("Configure Ollama settings and connect first.") + else: + db_client = st.session_state.db_client + llm_client = st.session_state.llm_client + db_label = db_client.db_type.upper() + + st.markdown( + f"Collects performance data from **{db_label}** " + f"({'AWR / V$ views' if db_client.db_type == DB_TYPE_ORACLE else 'pg_stat_statements / pg_stat_*'}) " + "and generates an AI-powered summary with action plan." + ) + + acol1, acol2 = st.columns(2) + with acol1: + if st.button("📈 Collect Data Only", use_container_width=True): + analyser = PerformanceAnalyser( + db_client=db_client, llm_client=llm_client + ) + with st.spinner("Collecting performance data..."): + raw_data = analyser.collect_data() + st.session_state.analyser = analyser + st.session_state["_last_analysis"] = { + "raw_data": raw_data, + "analysis": None, + } + st.success("Data collected!") + + with acol2: + if st.button( + "🧠 Full Analysis (Data + LLM)", + use_container_width=True, + type="primary", + ): + analyser = PerformanceAnalyser( + db_client=db_client, llm_client=llm_client + ) + with st.spinner("Collecting data and running LLM analysis..."): + result = analyser.analyse() + st.session_state.analyser = analyser + st.session_state["_last_analysis"] = result + st.success("Analysis complete!") + + # Display analysis results + last = st.session_state.get("_last_analysis") + if last: + st.divider() + + if last.get("analysis"): + st.subheader("AI Analysis & Action Plan") + st.markdown(last["analysis"]) + + raw = last.get("raw_data", {}) + if raw: + st.divider() + st.subheader("Raw Performance Data") + for section_name, section_data in raw.items(): + if section_name == "db_type": + continue + label = section_name.replace("_", " ").title() + with st.expander(f"📊 {label}"): + if isinstance(section_data, dict) and "error" in section_data: + st.error(section_data["error"]) + elif isinstance(section_data, list) and section_data: + st.dataframe( + pd.DataFrame(section_data), + use_container_width=True, + hide_index=True, + ) + else: + st.info("No data available.") + # ---- History tab ---------------------------------------------------------- with tab_history: st.subheader("Query History") diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py new file mode 100644 index 0000000..737a3a7 --- /dev/null +++ b/tools/pg-assistant/auto_analyse.py @@ -0,0 +1,280 @@ +"""Performance analysis for Oracle (AWR/V$) and PostgreSQL (pg_stat_statements).""" + +import logging +from typing import Any + +from db_client import BaseDBClient, DB_TYPE_ORACLE, DB_TYPE_POSTGRESQL +from llm_client import LLMClient + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Oracle V$ performance queries +# --------------------------------------------------------------------------- +_ORA_TOP_SQL = """ + SELECT * FROM ( + SELECT + sql_id, + plan_hash_value, + ROUND(elapsed_time / 1e6, 2) AS elapsed_sec, + executions, + buffer_gets, + disk_reads, + SUBSTR(sql_text, 1, 200) AS sql_text + FROM v$sql + ORDER BY elapsed_time DESC + ) WHERE ROWNUM <= 20 +""" + +_ORA_WAIT_EVENTS = """ + SELECT * FROM ( + SELECT + event, + total_waits, + ROUND(time_waited / 100, 2) AS time_waited_sec, + ROUND(average_wait / 100, 4) AS avg_wait_sec + FROM v$system_event + WHERE wait_class != 'Idle' + ORDER BY time_waited DESC + ) WHERE ROWNUM <= 20 +""" + +_ORA_SYS_STATS = """ + SELECT name, value + FROM v$sysstat + WHERE name IN ( + 'db block gets', 'consistent gets', 'physical reads', + 'redo size', 'sorts (memory)', 'sorts (disk)', + 'rows processed', 'parse count (total)', 'parse count (hard)', + 'execute count', 'user commits', 'user rollbacks' + ) + ORDER BY name +""" + +_ORA_SGA = """ + SELECT name, ROUND(bytes / 1048576, 2) AS size_mb + FROM v$sgainfo + WHERE name IN ( + 'Fixed SGA Size', 'Redo Buffers', 'Buffer Cache Size', + 'Shared Pool Size', 'Large Pool Size', 'Java Pool Size', + 'Streams Pool Size', 'Maximum SGA Size' + ) + ORDER BY name +""" + +_ORA_TABLESPACE_IO = """ + SELECT * FROM ( + SELECT + ts.name AS tablespace_name, + SUM(fs.phyrds) AS physical_reads, + SUM(fs.phywrts) AS physical_writes, + ROUND(SUM(fs.readtim) / 100, 2) AS read_time_sec, + ROUND(SUM(fs.writetim) / 100, 2) AS write_time_sec + FROM v$filestat fs + JOIN v$datafile df ON fs.file# = df.file# + JOIN v$tablespace ts ON df.ts# = ts.ts# + GROUP BY ts.name + ORDER BY physical_reads + physical_writes DESC + ) WHERE ROWNUM <= 20 +""" + +# --------------------------------------------------------------------------- +# PostgreSQL performance queries +# --------------------------------------------------------------------------- +_PG_TOP_QUERIES = """ + SELECT + queryid, + LEFT(query, 200) AS query_text, + calls, + ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec, + ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec, + rows, + shared_blks_hit, + shared_blks_read, + CASE WHEN shared_blks_hit + shared_blks_read > 0 + THEN ROUND( + shared_blks_hit::numeric + / (shared_blks_hit + shared_blks_read) * 100, 2 + ) + ELSE 100 + END AS cache_hit_pct + FROM pg_stat_statements + ORDER BY total_exec_time DESC + LIMIT 20 +""" + +_PG_TABLE_STATS = """ + SELECT + schemaname, relname, + seq_scan, seq_tup_read, + idx_scan, idx_tup_fetch, + n_tup_ins, n_tup_upd, n_tup_del, + n_live_tup, n_dead_tup, + last_vacuum, last_autovacuum, + last_analyze, last_autoanalyze + FROM pg_stat_user_tables + ORDER BY seq_scan + COALESCE(idx_scan, 0) DESC + LIMIT 20 +""" + +_PG_DB_STATS = """ + SELECT + datname, + numbackends, + xact_commit, xact_rollback, + blks_read, blks_hit, + CASE WHEN blks_hit + blks_read > 0 + THEN ROUND(blks_hit::numeric / (blks_hit + blks_read) * 100, 2) + ELSE 100 + END AS cache_hit_pct, + tup_returned, tup_fetched, + tup_inserted, tup_updated, tup_deleted, + temp_files, temp_bytes + FROM pg_stat_database + WHERE datname = current_database() +""" + +_PG_BGWRITER = """ + SELECT + checkpoints_timed, checkpoints_req, + buffers_checkpoint, buffers_clean, buffers_backend, + maxwritten_clean + FROM pg_stat_bgwriter +""" + +_PG_UNUSED_INDEXES = """ + SELECT + schemaname, relname, indexrelname, + idx_scan, idx_tup_read, idx_tup_fetch, + pg_relation_size(indexrelid) / 1048576 AS index_size_mb + FROM pg_stat_user_indexes + WHERE idx_scan = 0 + ORDER BY pg_relation_size(indexrelid) DESC + LIMIT 20 +""" + +ANALYSIS_SYSTEM_PROMPT = ( + "You are a senior database performance engineer. " + "Analyze the following database performance data and provide:\n" + "1. **Executive Summary** (2-3 sentences)\n" + "2. **Key Findings** (bullet list of important observations)\n" + "3. **Top Issues** (ranked by severity)\n" + "4. **Action Plan** (prioritized recommendations with specific SQL or steps)\n\n" + "Be concise and actionable. Use markdown formatting." +) + + +# --------------------------------------------------------------------------- +# Analyser +# --------------------------------------------------------------------------- +class PerformanceAnalyser: + """Collects DB performance data and generates LLM-powered analysis.""" + + def __init__( + self, + db_client: BaseDBClient, + llm_client: LLMClient, + ) -> None: + self.db_client = db_client + self.llm_client = llm_client + + def collect_data(self) -> dict[str, Any]: + """Collect raw performance data from the database.""" + if self.db_client.db_type == DB_TYPE_ORACLE: + return self._collect_oracle() + return self._collect_postgresql() + + def analyse(self) -> dict[str, Any]: + """Collect data, generate LLM analysis, and return everything.""" + raw_data = self.collect_data() + report_text = self._format_report(raw_data) + + try: + llm_response = self.llm_client.generate( + prompt=report_text, + system_prompt=ANALYSIS_SYSTEM_PROMPT, + ) + except (ConnectionError, RuntimeError) as exc: + llm_response = f"LLM analysis failed: {exc}" + + return { + "raw_data": raw_data, + "report_text": report_text, + "analysis": llm_response, + } + + # -- Oracle collection --------------------------------------------------- + + def _collect_oracle(self) -> dict[str, Any]: + sections: dict[str, Any] = {} + queries = { + "top_sql": _ORA_TOP_SQL, + "wait_events": _ORA_WAIT_EVENTS, + "system_stats": _ORA_SYS_STATS, + "sga_info": _ORA_SGA, + "tablespace_io": _ORA_TABLESPACE_IO, + } + for name, sql in queries.items(): + result = self.db_client.execute_query(sql) + if "error" in result: + sections[name] = {"error": result["error"]} + else: + sections[name] = result.get("rows", []) + sections["db_type"] = DB_TYPE_ORACLE + return sections + + # -- PostgreSQL collection ----------------------------------------------- + + def _collect_postgresql(self) -> dict[str, Any]: + sections: dict[str, Any] = {} + queries = { + "top_queries": _PG_TOP_QUERIES, + "table_stats": _PG_TABLE_STATS, + "database_stats": _PG_DB_STATS, + "bgwriter_stats": _PG_BGWRITER, + "unused_indexes": _PG_UNUSED_INDEXES, + } + for name, sql in queries.items(): + result = self.db_client.execute_query(sql) + if "error" in result: + sections[name] = {"error": result["error"]} + else: + sections[name] = result.get("rows", []) + sections["db_type"] = DB_TYPE_POSTGRESQL + return sections + + # -- Report formatting --------------------------------------------------- + + def _format_report(self, data: dict[str, Any]) -> str: + """Format collected data into a human-readable report for the LLM.""" + db_type = data.get("db_type", "unknown") + parts = [f"DATABASE PERFORMANCE REPORT ({db_type.upper()})\n{'=' * 60}\n"] + + for section_name, section_data in data.items(): + if section_name == "db_type": + continue + parts.append(f"\n--- {section_name.upper().replace('_', ' ')} ---") + if isinstance(section_data, dict) and "error" in section_data: + parts.append(f" ERROR: {section_data['error']}") + elif isinstance(section_data, list): + if not section_data: + parts.append(" (no data)") + else: + for i, row in enumerate(section_data[:15]): + parts.append(f" [{i + 1}] {_format_row(row)}") + if len(section_data) > 15: + parts.append(f" ... and {len(section_data) - 15} more rows") + else: + parts.append(f" {section_data}") + + return "\n".join(parts) + + +def _format_row(row: dict[str, Any]) -> str: + """Format a single row dict into a compact string.""" + items = [] + for k, v in row.items(): + if v is None: + continue + items.append(f"{k}={v}") + return ", ".join(items) diff --git a/tools/pg-assistant/auto_monitor.py b/tools/pg-assistant/auto_monitor.py new file mode 100644 index 0000000..938ca67 --- /dev/null +++ b/tools/pg-assistant/auto_monitor.py @@ -0,0 +1,350 @@ +"""Tablespace monitoring with auto-extend support for Oracle and PostgreSQL.""" + +import logging +import os +import threading +from datetime import datetime, timezone +from typing import Any, Optional + +from db_client import BaseDBClient, DB_TYPE_ORACLE + +logger = logging.getLogger(__name__) + +DEFAULT_THRESHOLD_PCT = 85 +DEFAULT_MAX_FILE_SIZE_GB = 20 +DEFAULT_INTERVAL_SEC = 3600 # 1 hour + + +# --------------------------------------------------------------------------- +# Oracle tablespace queries +# --------------------------------------------------------------------------- +_ORACLE_TABLESPACE_USAGE_SQL = """ + SELECT + df.tablespace_name, + COUNT(df.file_id) AS file_count, + ROUND(SUM(df.bytes) / 1048576, 2) AS total_size_mb, + ROUND(NVL(SUM(fs.free_bytes), 0) / 1048576, 2) AS free_mb, + ROUND((SUM(df.bytes) - NVL(SUM(fs.free_bytes), 0)) / 1048576, 2) AS used_mb, + ROUND( + (SUM(df.bytes) - NVL(SUM(fs.free_bytes), 0)) / SUM(df.bytes) * 100, 2 + ) AS used_pct + FROM dba_data_files df + LEFT JOIN ( + SELECT file_id, SUM(bytes) AS free_bytes + FROM dba_free_space + GROUP BY file_id + ) fs ON df.file_id = fs.file_id + GROUP BY df.tablespace_name + ORDER BY used_pct DESC +""" + +_ORACLE_DATAFILES_SQL = """ + SELECT + file_id, + file_name, + tablespace_name, + ROUND(bytes / 1048576, 2) AS size_mb, + ROUND(maxbytes / 1048576, 2) AS max_size_mb, + autoextensible + FROM dba_data_files + WHERE tablespace_name = :ts_name + ORDER BY file_id +""" + +# --------------------------------------------------------------------------- +# PostgreSQL storage queries +# --------------------------------------------------------------------------- +_PG_DATABASE_SIZE_SQL = """ + SELECT + datname AS database_name, + pg_database_size(datname) / 1048576 AS size_mb + FROM pg_database + WHERE datname NOT IN ('template0', 'template1') + ORDER BY size_mb DESC +""" + +_PG_TABLE_SIZE_SQL = """ + SELECT + schemaname, + tablename, + pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) / 1048576 AS total_size_mb, + pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) / 1048576 AS table_size_mb + FROM pg_tables + WHERE schemaname NOT IN ('pg_catalog', 'information_schema') + ORDER BY total_size_mb DESC + LIMIT 50 +""" + +_PG_TABLESPACE_SQL = """ + SELECT + spcname AS tablespace_name, + pg_tablespace_location(oid) AS location, + pg_tablespace_size(oid) / 1048576 AS size_mb + FROM pg_tablespace + ORDER BY size_mb DESC +""" + + +# --------------------------------------------------------------------------- +# Monitor event dataclass-like dict builder +# --------------------------------------------------------------------------- +def _event( + status: str, + tablespace_data: list[dict[str, Any]], + actions: list[dict[str, Any]], + error: str = "", +) -> dict[str, Any]: + return { + "timestamp": datetime.now(timezone.utc).isoformat(), + "status": status, + "tablespace_data": tablespace_data, + "actions": actions, + "error": error, + } + + +# --------------------------------------------------------------------------- +# Core monitor logic +# --------------------------------------------------------------------------- +class TablespaceMonitor: + """Monitors tablespace usage and auto-extends datafiles when needed.""" + + def __init__( + self, + db_client: BaseDBClient, + threshold_pct: float = DEFAULT_THRESHOLD_PCT, + max_file_size_gb: float = DEFAULT_MAX_FILE_SIZE_GB, + interval_sec: int = DEFAULT_INTERVAL_SEC, + ) -> None: + self.db_client = db_client + self.threshold_pct = threshold_pct + self.max_file_size_gb = max_file_size_gb + self.max_file_size_mb = max_file_size_gb * 1024 + self.interval_sec = interval_sec + self.events: list[dict[str, Any]] = [] + self._thread: Optional[threading.Thread] = None + self._stop = threading.Event() + self.running = False + + # -- public API ---------------------------------------------------------- + + def start(self) -> None: + """Start periodic monitoring in a background thread.""" + if self._thread and self._thread.is_alive(): + return + self._stop.clear() + self._thread = threading.Thread(target=self._loop, daemon=True) + self._thread.start() + self.running = True + logger.info( + "Tablespace monitor started (interval=%ds, threshold=%s%%)", + self.interval_sec, + self.threshold_pct, + ) + + def stop(self) -> None: + """Stop the background monitoring thread.""" + self._stop.set() + self.running = False + logger.info("Tablespace monitor stopped") + + def run_check(self) -> dict[str, Any]: + """Run a single monitoring check and return the event dict.""" + try: + if self.db_client.db_type == DB_TYPE_ORACLE: + return self._check_oracle() + return self._check_postgresql() + except Exception as exc: + event = _event("error", [], [], error=str(exc)) + self.events.append(event) + return event + + # -- background loop ----------------------------------------------------- + + def _loop(self) -> None: + while not self._stop.is_set(): + try: + self.run_check() + except Exception as exc: + logger.error("Monitor check failed: %s", exc) + self._stop.wait(self.interval_sec) + self.running = False + + # -- Oracle checks ------------------------------------------------------- + + def _check_oracle(self) -> dict[str, Any]: + result = self.db_client.execute_query(_ORACLE_TABLESPACE_USAGE_SQL) + if "error" in result: + event = _event("error", [], [], error=result["error"]) + self.events.append(event) + return event + + ts_data = result.get("rows", []) + actions: list[dict[str, Any]] = [] + + for ts in ts_data: + ts_name = ts.get("TABLESPACE_NAME") or ts.get("tablespace_name", "") + used_pct = float(ts.get("USED_PCT") or ts.get("used_pct", 0)) + + if used_pct >= self.threshold_pct: + ts_actions = self._auto_extend_oracle(ts_name) + actions.extend(ts_actions) + + status = "warning" if actions else "ok" + event = _event(status, ts_data, actions) + self.events.append(event) + return event + + def _auto_extend_oracle(self, tablespace_name: str) -> list[dict[str, Any]]: + """Attempt to auto-extend or add datafiles for an Oracle tablespace.""" + actions: list[dict[str, Any]] = [] + + df_result = self.db_client.execute_query( + _ORACLE_DATAFILES_SQL.replace(":ts_name", f"'{tablespace_name}'") + ) + if "error" in df_result: + actions.append( + { + "tablespace": tablespace_name, + "action": "error", + "detail": f"Failed to query datafiles: {df_result['error']}", + } + ) + return actions + + datafiles = df_result.get("rows", []) + extended_any = False + + for df in datafiles: + file_name = df.get("FILE_NAME") or df.get("file_name", "") + max_size_mb = float(df.get("MAX_SIZE_MB") or df.get("max_size_mb", 0)) + autoext = df.get("AUTOEXTENSIBLE") or df.get("autoextensible", "NO") + + if autoext == "YES" and max_size_mb >= self.max_file_size_mb: + continue + + if autoext != "YES" or max_size_mb < self.max_file_size_mb: + max_mb = int(self.max_file_size_mb) + sql = ( + f"ALTER DATABASE DATAFILE '{file_name}' " + f"AUTOEXTEND ON MAXSIZE {max_mb}M" + ) + stmt_result = self.db_client.execute_statement(sql) + if stmt_result.get("success"): + actions.append( + { + "tablespace": tablespace_name, + "action": "autoextend_enabled", + "file": file_name, + "max_size_mb": max_mb, + "sql": sql, + } + ) + extended_any = True + else: + actions.append( + { + "tablespace": tablespace_name, + "action": "autoextend_failed", + "file": file_name, + "error": stmt_result.get("error", "unknown"), + "sql": sql, + } + ) + + if not extended_any: + add_sql = ( + f"ALTER TABLESPACE {tablespace_name} ADD DATAFILE " + f"SIZE 1024M AUTOEXTEND ON MAXSIZE {int(self.max_file_size_mb)}M" + ) + stmt_result = self.db_client.execute_statement(add_sql) + if stmt_result.get("success"): + actions.append( + { + "tablespace": tablespace_name, + "action": "datafile_added", + "sql": add_sql, + } + ) + else: + dir_path = self._derive_datafile_dir(datafiles) + if dir_path: + new_name = os.path.join( + dir_path, + f"{tablespace_name.lower()}_auto_{len(datafiles) + 1:02d}.dbf", + ) + add_sql2 = ( + f"ALTER TABLESPACE {tablespace_name} ADD DATAFILE " + f"'{new_name}' SIZE 1024M AUTOEXTEND ON " + f"MAXSIZE {int(self.max_file_size_mb)}M" + ) + stmt_result2 = self.db_client.execute_statement(add_sql2) + if stmt_result2.get("success"): + actions.append( + { + "tablespace": tablespace_name, + "action": "datafile_added", + "file": new_name, + "sql": add_sql2, + } + ) + else: + actions.append( + { + "tablespace": tablespace_name, + "action": "add_datafile_failed", + "error": stmt_result2.get("error", "unknown"), + "sql": add_sql2, + } + ) + else: + actions.append( + { + "tablespace": tablespace_name, + "action": "add_datafile_failed", + "error": stmt_result.get("error", "unknown"), + "sql": add_sql, + } + ) + + return actions + + @staticmethod + def _derive_datafile_dir(datafiles: list[dict[str, Any]]) -> str: + """Derive directory from existing datafiles for new file placement.""" + for df in datafiles: + fname = df.get("FILE_NAME") or df.get("file_name", "") + if fname: + return os.path.dirname(fname) + return "" + + # -- PostgreSQL checks --------------------------------------------------- + + def _check_postgresql(self) -> dict[str, Any]: + ts_result = self.db_client.execute_query(_PG_TABLESPACE_SQL) + db_result = self.db_client.execute_query(_PG_DATABASE_SIZE_SQL) + tbl_result = self.db_client.execute_query(_PG_TABLE_SIZE_SQL) + + ts_data: list[dict[str, Any]] = [] + actions: list[dict[str, Any]] = [] + + if "error" not in ts_result: + ts_data.extend(ts_result.get("rows", [])) + if "error" not in db_result: + ts_data.append({"_section": "databases", "rows": db_result.get("rows", [])}) + if "error" not in tbl_result: + ts_data.append({"_section": "tables", "rows": tbl_result.get("rows", [])}) + + for err_result in (ts_result, db_result, tbl_result): + if "error" in err_result: + actions.append( + { + "action": "query_error", + "error": err_result["error"], + } + ) + + status = "ok" if not actions else "warning" + event = _event(status, ts_data, actions) + self.events.append(event) + return event diff --git a/tools/pg-assistant/db_client.py b/tools/pg-assistant/db_client.py index 8357f86..a110b7e 100644 --- a/tools/pg-assistant/db_client.py +++ b/tools/pg-assistant/db_client.py @@ -1,17 +1,71 @@ -"""Direct PostgreSQL database client using psycopg2.""" +"""Database client supporting PostgreSQL and Oracle connections.""" +import abc import logging import time from typing import Any, Optional -import psycopg2 -import psycopg2.extras - logger = logging.getLogger(__name__) +DB_TYPE_POSTGRESQL = "postgresql" +DB_TYPE_ORACLE = "oracle" +SUPPORTED_DB_TYPES = (DB_TYPE_POSTGRESQL, DB_TYPE_ORACLE) + +# Conditional imports -- only the driver for the chosen DB type is required. +try: + import psycopg2 + import psycopg2.extras +except ImportError: + psycopg2 = None # type: ignore[assignment] + +try: + import oracledb +except ImportError: + oracledb = None # type: ignore[assignment] + -class DBClient: - """Client for direct PostgreSQL database connections.""" +# --------------------------------------------------------------------------- +# Abstract base +# --------------------------------------------------------------------------- +class BaseDBClient(abc.ABC): + """Common interface for all database clients.""" + + @abc.abstractmethod + def connect(self) -> None: ... + + @abc.abstractmethod + def disconnect(self) -> None: ... + + @property + @abc.abstractmethod + def is_connected(self) -> bool: ... + + @property + @abc.abstractmethod + def db_type(self) -> str: ... + + @abc.abstractmethod + def execute_query(self, sql: str) -> dict[str, Any]: + """Execute a SELECT query and return columns/rows/row_count/elapsed_ms.""" + + @abc.abstractmethod + def execute_statement(self, sql: str) -> dict[str, Any]: + """Execute DDL/DML (no result set). Returns success/error/elapsed_ms.""" + + @abc.abstractmethod + def get_schema( + self, schema_name: str = "" + ) -> Optional[dict[str, list[dict[str, str]]]]: ... + + @abc.abstractmethod + def get_connection_info(self) -> str: ... + + +# --------------------------------------------------------------------------- +# PostgreSQL +# --------------------------------------------------------------------------- +class PostgreSQLClient(BaseDBClient): + """Client for PostgreSQL via psycopg2.""" def __init__( self, @@ -22,7 +76,12 @@ def __init__( password: str, sslmode: str = "prefer", ) -> None: - self.conn_params = { + if psycopg2 is None: + raise ImportError( + "psycopg2 is required for PostgreSQL connections. " + "Install it with: pip install psycopg2-binary" + ) + self.conn_params: dict[str, Any] = { "host": host, "port": port, "dbname": database, @@ -30,14 +89,13 @@ def __init__( "password": password, "sslmode": sslmode, } - self._conn: Optional[psycopg2.extensions.connection] = None + self._conn: Any = None - def connect(self) -> None: - """Establish a connection to PostgreSQL. + @property + def db_type(self) -> str: + return DB_TYPE_POSTGRESQL - Raises: - ConnectionError: If the database is unreachable. - """ + def connect(self) -> None: try: self._conn = psycopg2.connect(**self.conn_params) self._conn.autocommit = True @@ -48,43 +106,27 @@ def connect(self) -> None: self.conn_params["dbname"], ) except psycopg2.OperationalError as exc: - logger.error("Failed to connect to PostgreSQL: %s", exc) raise ConnectionError(f"Cannot connect to PostgreSQL: {exc}") from exc def disconnect(self) -> None: - """Close the database connection.""" if self._conn and not self._conn.closed: self._conn.close() logger.info("Disconnected from PostgreSQL") @property def is_connected(self) -> bool: - """Check whether the connection is active.""" if self._conn is None or self._conn.closed: return False try: with self._conn.cursor() as cur: cur.execute("SELECT 1") return True - except psycopg2.Error: + except Exception: return False def execute_query(self, sql: str) -> dict[str, Any]: - """Execute a SQL query and return results. - - Args: - sql: The SQL query string to execute. - - Returns: - A dict with 'columns', 'rows', 'row_count', and 'elapsed_ms'. - - Raises: - ConnectionError: If not connected to the database. - RuntimeError: If the query fails. - """ if not self.is_connected: raise ConnectionError("Not connected to PostgreSQL. Please connect first.") - start = time.monotonic() try: with self._conn.cursor( @@ -102,10 +144,25 @@ def execute_query(self, sql: str) -> dict[str, Any]: "row_count": len(rows), "elapsed_ms": round(elapsed * 1000, 2), } - except psycopg2.Error as exc: + except Exception as exc: elapsed = time.monotonic() - start logger.error("Query execution failed: %s", exc) + return {"error": str(exc).strip(), "elapsed_ms": round(elapsed * 1000, 2)} + + def execute_statement(self, sql: str) -> dict[str, Any]: + if not self.is_connected: + raise ConnectionError("Not connected to PostgreSQL. Please connect first.") + start = time.monotonic() + try: + with self._conn.cursor() as cur: + cur.execute(sql) + elapsed = time.monotonic() - start + return {"success": True, "elapsed_ms": round(elapsed * 1000, 2)} + except Exception as exc: + elapsed = time.monotonic() - start + logger.error("Statement execution failed: %s", exc) return { + "success": False, "error": str(exc).strip(), "elapsed_ms": round(elapsed * 1000, 2), } @@ -113,15 +170,6 @@ def execute_query(self, sql: str) -> dict[str, Any]: def get_schema( self, schema_name: str = "public" ) -> Optional[dict[str, list[dict[str, str]]]]: - """Retrieve database schema metadata. - - Args: - schema_name: The PostgreSQL schema to inspect. - - Returns: - Schema metadata dict mapping table names to column info lists, - or None on failure. - """ sql = """ SELECT t.table_name, @@ -139,14 +187,13 @@ def get_schema( """ if not self.is_connected: return None - try: with self._conn.cursor( cursor_factory=psycopg2.extras.RealDictCursor ) as cur: cur.execute(sql, (schema_name,)) rows = cur.fetchall() - except psycopg2.Error as exc: + except Exception as exc: logger.warning("Failed to fetch schema: %s", exc) return None @@ -159,16 +206,187 @@ def get_schema( "is_nullable": row["is_nullable"], "column_default": row["column_default"] or "", } - if table not in schema: - schema[table] = [] - schema[table].append(col_info) + schema.setdefault(table, []).append(col_info) + return schema + + def get_connection_info(self) -> str: + p = self.conn_params + return f"{p['user']}@{p['host']}:{p['port']}/{p['dbname']}" + + +# --------------------------------------------------------------------------- +# Oracle +# --------------------------------------------------------------------------- +class OracleClient(BaseDBClient): + """Client for Oracle via python-oracledb (thin mode, no Oracle Client needed).""" + + def __init__( + self, + host: str, + port: int, + service_name: str, + user: str, + password: str, + ) -> None: + if oracledb is None: + raise ImportError( + "oracledb is required for Oracle connections. " + "Install it with: pip install oracledb" + ) + self._host = host + self._port = port + self._service_name = service_name + self._user = user + self._password = password + self._dsn = f"{host}:{port}/{service_name}" + self._conn: Any = None + + @property + def db_type(self) -> str: + return DB_TYPE_ORACLE + def connect(self) -> None: + try: + self._conn = oracledb.connect( + user=self._user, password=self._password, dsn=self._dsn + ) + logger.info("Connected to Oracle at %s", self._dsn) + except oracledb.Error as exc: + raise ConnectionError(f"Cannot connect to Oracle: {exc}") from exc + + def disconnect(self) -> None: + if self._conn is not None: + try: + self._conn.close() + logger.info("Disconnected from Oracle") + except Exception: + pass + self._conn = None + + @property + def is_connected(self) -> bool: + if self._conn is None: + return False + try: + with self._conn.cursor() as cur: + cur.execute("SELECT 1 FROM DUAL") + return True + except Exception: + return False + + def execute_query(self, sql: str) -> dict[str, Any]: + if not self.is_connected: + raise ConnectionError("Not connected to Oracle. Please connect first.") + start = time.monotonic() + try: + with self._conn.cursor() as cur: + cur.execute(sql) + if cur.description: + columns = [desc[0] for desc in cur.description] + raw_rows = cur.fetchall() + elapsed = time.monotonic() - start + rows = [dict(zip(columns, r)) for r in raw_rows] + return { + "columns": columns, + "rows": rows, + "row_count": len(rows), + "elapsed_ms": round(elapsed * 1000, 2), + } + elapsed = time.monotonic() - start + return { + "columns": [], + "rows": [], + "row_count": 0, + "elapsed_ms": round(elapsed * 1000, 2), + } + except Exception as exc: + elapsed = time.monotonic() - start + logger.error("Query execution failed: %s", exc) + return {"error": str(exc).strip(), "elapsed_ms": round(elapsed * 1000, 2)} + + def execute_statement(self, sql: str) -> dict[str, Any]: + if not self.is_connected: + raise ConnectionError("Not connected to Oracle. Please connect first.") + start = time.monotonic() + try: + with self._conn.cursor() as cur: + cur.execute(sql) + self._conn.commit() + elapsed = time.monotonic() - start + return {"success": True, "elapsed_ms": round(elapsed * 1000, 2)} + except Exception as exc: + elapsed = time.monotonic() - start + logger.error("Statement execution failed: %s", exc) + return { + "success": False, + "error": str(exc).strip(), + "elapsed_ms": round(elapsed * 1000, 2), + } + + def get_schema( + self, schema_name: str = "" + ) -> Optional[dict[str, list[dict[str, str]]]]: + if not schema_name: + schema_name = self._user.upper() + sql = """ + SELECT table_name, column_name, data_type, nullable, data_default + FROM all_tab_columns + WHERE owner = :owner + ORDER BY table_name, column_id + """ + if not self.is_connected: + return None + try: + with self._conn.cursor() as cur: + cur.execute(sql, {"owner": schema_name}) + raw_rows = cur.fetchall() + except Exception as exc: + logger.warning("Failed to fetch Oracle schema: %s", exc) + return None + + schema: dict[str, list[dict[str, str]]] = {} + for row in raw_rows: + table = row[0] + col_info = { + "column_name": row[1], + "data_type": row[2], + "is_nullable": "YES" if row[3] == "Y" else "NO", + "column_default": str(row[4]) if row[4] else "", + } + schema.setdefault(table, []).append(col_info) return schema def get_connection_info(self) -> str: - """Return a display-friendly connection string (password masked).""" - return ( - f"{self.conn_params['user']}@" - f"{self.conn_params['host']}:{self.conn_params['port']}/" - f"{self.conn_params['dbname']}" + return f"{self._user}@{self._dsn}" + + +# --------------------------------------------------------------------------- +# Factory +# --------------------------------------------------------------------------- +def create_db_client(db_type: str, **kwargs: Any) -> BaseDBClient: + """Create a database client for the given type. + + Args: + db_type: One of 'postgresql' or 'oracle'. + **kwargs: Connection parameters forwarded to the client constructor. + """ + if db_type == DB_TYPE_POSTGRESQL: + return PostgreSQLClient( + host=kwargs["host"], + port=kwargs["port"], + database=kwargs["database"], + user=kwargs["user"], + password=kwargs["password"], + sslmode=kwargs.get("sslmode", "prefer"), + ) + if db_type == DB_TYPE_ORACLE: + return OracleClient( + host=kwargs["host"], + port=kwargs["port"], + service_name=kwargs["service_name"], + user=kwargs["user"], + password=kwargs["password"], ) + raise ValueError( + f"Unsupported database type: {db_type!r}. Supported: {SUPPORTED_DB_TYPES}" + ) diff --git a/tools/pg-assistant/profile_manager.py b/tools/pg-assistant/profile_manager.py index 456d65b..8a1c2b4 100644 --- a/tools/pg-assistant/profile_manager.py +++ b/tools/pg-assistant/profile_manager.py @@ -1,4 +1,4 @@ -"""Database connection profile manager — save and load profiles as JSON.""" +"""Database connection profile manager -- save and load profiles as JSON.""" import json import logging @@ -51,43 +51,42 @@ def list_profiles(self) -> list[str]: return list(self._load_all().keys()) def get_profile(self, name: str) -> Optional[dict[str, Any]]: - """Retrieve a saved profile by name. - - Args: - name: The profile name. - - Returns: - A dict with connection parameters, or None if not found. - """ + """Retrieve a saved profile by name.""" profiles = self._load_all() return profiles.get(name) def save_profile( self, name: str, + db_type: str, host: str, port: int, - database: str, user: str, password: str, + database: str = "", + service_name: str = "", sslmode: str = "prefer", ) -> None: """Save a database connection profile. Args: name: A friendly name for the profile. - host: PostgreSQL host. - port: PostgreSQL port. - database: Database name. + db_type: 'postgresql' or 'oracle'. + host: Database host. + port: Database port. user: Database user. password: Database password. - sslmode: SSL mode (default: prefer). + database: Database name (PostgreSQL). + service_name: Service name (Oracle). + sslmode: SSL mode (PostgreSQL only, default: prefer). """ profiles = self._load_all() profiles[name] = { + "db_type": db_type, "host": host, "port": port, "database": database, + "service_name": service_name, "user": user, "password": password, "sslmode": sslmode, @@ -98,9 +97,6 @@ def save_profile( def delete_profile(self, name: str) -> bool: """Delete a saved profile. - Args: - name: The profile name to delete. - Returns: True if deleted, False if not found. """ diff --git a/tools/pg-assistant/requirements.txt b/tools/pg-assistant/requirements.txt index 7ef45a6..8efff7e 100644 --- a/tools/pg-assistant/requirements.txt +++ b/tools/pg-assistant/requirements.txt @@ -1,4 +1,5 @@ requests>=2.31.0,<3.0.0 psycopg2-binary>=2.9.0,<3.0.0 +oracledb>=2.0.0,<3.0.0 streamlit>=1.28.0,<2.0.0 pandas>=2.0.0,<3.0.0 diff --git a/tools/pg-assistant/sql_generator.py b/tools/pg-assistant/sql_generator.py index 42dd313..798df2f 100644 --- a/tools/pg-assistant/sql_generator.py +++ b/tools/pg-assistant/sql_generator.py @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) -SYSTEM_PROMPT = ( +_PG_SYSTEM_PROMPT = ( "You are a PostgreSQL expert. You receive natural language questions about " "a PostgreSQL database and return ONLY valid SQL SELECT queries. " "Rules:\n" @@ -21,6 +21,20 @@ "-- CANNOT_GENERATE" ) +_ORA_SYSTEM_PROMPT = ( + "You are an Oracle Database expert. You receive natural language questions about " + "an Oracle database and return ONLY valid SQL SELECT queries. " + "Rules:\n" + "- Return ONLY the SQL query, nothing else.\n" + "- Do NOT include explanations, comments, or markdown formatting.\n" + "- Do NOT use DROP, DELETE, TRUNCATE, UPDATE, INSERT, ALTER, CREATE, or GRANT.\n" + "- Only generate SELECT statements.\n" + "- Use Oracle SQL syntax (e.g. ROWNUM, FETCH FIRST, NVL, DUAL, etc.).\n" + "- Always terminate the query with a semicolon.\n" + "- If the question cannot be answered with a SELECT query, respond with: " + "-- CANNOT_GENERATE" +) + DANGEROUS_KEYWORDS = frozenset( { "DROP", @@ -54,17 +68,21 @@ class SQLGenerator: def __init__( self, llm_client: LLMClient, + db_type: str = "postgresql", schema_metadata: Optional[dict[str, Any]] = None, ) -> None: self.llm_client = llm_client + self.db_type = db_type self.schema_metadata = schema_metadata - def update_schema(self, schema_metadata: dict[str, Any]) -> None: - """Update the schema metadata used for prompt context. + @property + def system_prompt(self) -> str: + if self.db_type == "oracle": + return _ORA_SYSTEM_PROMPT + return _PG_SYSTEM_PROMPT - Args: - schema_metadata: Dict mapping table names to column info lists. - """ + def update_schema(self, schema_metadata: dict[str, Any]) -> None: + """Update the schema metadata used for prompt context.""" self.schema_metadata = schema_metadata logger.info("Schema metadata updated: %d tables", len(schema_metadata)) @@ -99,7 +117,7 @@ def generate_sql(self, user_query: str) -> str: try: raw_response = self.llm_client.generate( prompt=retry_prompt, - system_prompt=SYSTEM_PROMPT, + system_prompt=self.system_prompt, ) except (ConnectionError, RuntimeError) as exc: logger.error("LLM request failed: %s", exc) @@ -130,18 +148,12 @@ def generate_sql(self, user_query: str) -> str: ) def _build_prompt(self, user_query: str) -> str: - """Build the full prompt including schema context. - - Args: - user_query: The natural language question. - - Returns: - The complete prompt string. - """ + """Build the full prompt including schema context.""" parts = [] if self.schema_metadata: - parts.append("Database schema:") + db_label = "Oracle" if self.db_type == "oracle" else "PostgreSQL" + parts.append(f"Database schema ({db_label}):") for table_name, columns in self.schema_metadata.items(): col_defs = [] for col in columns: @@ -164,16 +176,7 @@ def _build_prompt(self, user_query: str) -> str: @staticmethod def _extract_sql(raw_response: str) -> str: - """Extract clean SQL from the LLM response. - - Strips markdown code blocks, comments, and extra whitespace. - - Args: - raw_response: The raw LLM output. - - Returns: - A cleaned SQL string. - """ + """Extract clean SQL from the LLM response.""" text = raw_response.strip() # Remove markdown code fences @@ -204,9 +207,6 @@ def _extract_sql(raw_response: str) -> str: def _validate_sql(sql: str) -> None: """Validate that the SQL is a safe SELECT query. - Args: - sql: The SQL query to validate. - Raises: UnsafeSQLError: If the query contains dangerous keywords. ValueError: If the query is not a valid SELECT statement. From 94e3973007c2c6f030e47ad3295eca8ca346bce0 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 5 Apr 2026 02:55:05 +0000 Subject: [PATCH 05/19] Increase Ollama timeout to 300s and add configurable timeout slider in UI - Default timeout increased from 120s to 300s (first model load is slow) - Added timeout slider (60-600s) in Ollama Settings sidebar - Improved timeout error message with troubleshooting hint --- tools/pg-assistant/app.py | 7 ++++++- tools/pg-assistant/llm_client.py | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index d830787..be9ce74 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -74,6 +74,9 @@ def _connected_db_type() -> str: st.subheader("🤖 Ollama Settings") ollama_url = st.text_input("Ollama URL", value="http://localhost:11434") ollama_model = st.text_input("Model", value="codellama") + ollama_timeout = st.slider( + "Request timeout (seconds)", 60, 600, 300, step=30, key="ollama_timeout" + ) if st.button("Test Ollama Connection"): test_llm = LLMClient(base_url=ollama_url, model=ollama_model) @@ -173,7 +176,9 @@ def _connected_db_type() -> str: db.connect() st.session_state.db_client = db - llm = LLMClient(base_url=ollama_url, model=ollama_model) + llm = LLMClient( + base_url=ollama_url, model=ollama_model, timeout=ollama_timeout + ) st.session_state.llm_client = llm gen = SQLGenerator(llm_client=llm, db_type=selected_db_type) st.session_state.sql_generator = gen diff --git a/tools/pg-assistant/llm_client.py b/tools/pg-assistant/llm_client.py index bb34846..4f3e2c8 100644 --- a/tools/pg-assistant/llm_client.py +++ b/tools/pg-assistant/llm_client.py @@ -10,7 +10,7 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434" DEFAULT_MODEL = "codellama" -DEFAULT_TIMEOUT = 120 +DEFAULT_TIMEOUT = 300 class LLMClient: @@ -67,7 +67,9 @@ def generate(self, prompt: str, system_prompt: str = "") -> str: except requests.Timeout as exc: logger.error("Ollama request timed out after %ds", self.timeout) raise RuntimeError( - f"Ollama request timed out after {self.timeout}s." + f"Ollama request timed out after {self.timeout}s. " + "Try increasing the timeout in the sidebar settings, or ensure " + "the model is fully loaded (first request is slower)." ) from exc elapsed = time.monotonic() - start From 3cd866190294e19d240ad26fe9bd2f9982d9e03f Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 5 Apr 2026 03:26:29 +0000 Subject: [PATCH 06/19] Fix Oracle SQL compatibility and add auto-retry on DB errors - Update Oracle system prompt to use ROWNUM instead of FETCH FIRST/OFFSET (compatible with Oracle 11g+, fixes ORA-00933) - Increase MAX_RETRIES from 2 to 3 for SQL generation - Add auto-retry in Query tab: when a query fails with a DB error, the error is fed back to the LLM to regenerate corrected SQL automatically - Explicit Oracle syntax guidance: NVL, DUAL, TO_DATE, subquery for ORDER BY + ROWNUM --- tools/pg-assistant/app.py | 119 +++++++++++++++++----------- tools/pg-assistant/sql_generator.py | 7 +- 2 files changed, 78 insertions(+), 48 deletions(-) diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index be9ce74..eb7ca9e 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -321,54 +321,81 @@ def _connected_db_type() -> str: gen_elapsed = 0 if sql: - st.subheader("Generated SQL") - st.code(sql, language="sql") - st.caption(f"Generated in {gen_elapsed:.2f}s") - - with st.spinner("Executing query..."): - result = db.execute_query(sql) - - if "error" in result: - st.error(f"**Query Error:** {result['error']}") - st.session_state.query_history.append( - { - "question": user_question.strip(), - "sql": sql, - "status": "error", - "error": result["error"], - "elapsed_ms": result.get("elapsed_ms", 0), - } - ) - else: - rows = result.get("rows", []) - row_count = result.get("row_count", 0) - elapsed_ms = result.get("elapsed_ms", 0) - - st.subheader("Results") - if rows: - df = pd.DataFrame(rows) - st.dataframe(df, use_container_width=True) - st.caption(f"{row_count} row(s) returned in {elapsed_ms}ms") - - csv = df.to_csv(index=False) - st.download_button( - "📥 Download CSV", - csv, - file_name="query_results.csv", - mime="text/csv", + max_exec_retries = 2 + for exec_attempt in range(1, max_exec_retries + 1): + st.subheader("Generated SQL") + st.code(sql, language="sql") + st.caption(f"Generated in {gen_elapsed:.2f}s") + + with st.spinner("Executing query..."): + result = db.execute_query(sql) + + if "error" in result and exec_attempt < max_exec_retries: + db_error = result["error"] + st.warning( + f"**Query failed** (attempt {exec_attempt}): {db_error}\n\n" + "Regenerating SQL with error feedback..." + ) + with st.spinner("Regenerating SQL with error context..."): + retry_start = time.monotonic() + try: + sql = generator.generate_sql( + f"{user_question.strip()}\n\n" + f"IMPORTANT: The previous SQL failed with this " + f"database error: {db_error}\n" + f"Previous failing SQL: {sql}\n" + f"Please generate a corrected query that avoids " + f"this error." + ) + gen_elapsed = time.monotonic() - retry_start + except (UnsafeSQLError, SQLGenerationError) as exc: + st.error(f"**Retry failed:** {exc}") + sql = None + break + continue + + if "error" in result: + st.error(f"**Query Error:** {result['error']}") + st.session_state.query_history.append( + { + "question": user_question.strip(), + "sql": sql, + "status": "error", + "error": result["error"], + "elapsed_ms": result.get("elapsed_ms", 0), + } ) else: - st.info("Query returned no results.") - - st.session_state.query_history.append( - { - "question": user_question.strip(), - "sql": sql, - "status": "success", - "row_count": row_count, - "elapsed_ms": elapsed_ms, - } - ) + rows = result.get("rows", []) + row_count = result.get("row_count", 0) + elapsed_ms = result.get("elapsed_ms", 0) + + st.subheader("Results") + if rows: + df = pd.DataFrame(rows) + st.dataframe(df, use_container_width=True) + st.caption(f"{row_count} row(s) returned in {elapsed_ms}ms") + + csv = df.to_csv(index=False) + st.download_button( + "📥 Download CSV", + csv, + file_name="query_results.csv", + mime="text/csv", + ) + else: + st.info("Query returned no results.") + + st.session_state.query_history.append( + { + "question": user_question.strip(), + "sql": sql, + "status": "success", + "row_count": row_count, + "elapsed_ms": elapsed_ms, + } + ) + break # ---- Schema tab ----------------------------------------------------------- with tab_schema: diff --git a/tools/pg-assistant/sql_generator.py b/tools/pg-assistant/sql_generator.py index 798df2f..00985af 100644 --- a/tools/pg-assistant/sql_generator.py +++ b/tools/pg-assistant/sql_generator.py @@ -29,7 +29,10 @@ "- Do NOT include explanations, comments, or markdown formatting.\n" "- Do NOT use DROP, DELETE, TRUNCATE, UPDATE, INSERT, ALTER, CREATE, or GRANT.\n" "- Only generate SELECT statements.\n" - "- Use Oracle SQL syntax (e.g. ROWNUM, FETCH FIRST, NVL, DUAL, etc.).\n" + "- Use classic Oracle SQL syntax compatible with Oracle 11g and above.\n" + "- For limiting rows, ALWAYS use WHERE ROWNUM <= N (wrap in a subquery if " + "ordering is needed). NEVER use FETCH FIRST or OFFSET/FETCH.\n" + "- Use NVL instead of COALESCE, DUAL for dummy selects, TO_DATE for dates.\n" "- Always terminate the query with a semicolon.\n" "- If the question cannot be answered with a SELECT query, respond with: " "-- CANNOT_GENERATE" @@ -51,7 +54,7 @@ } ) -MAX_RETRIES = 2 +MAX_RETRIES = 3 class SQLGenerationError(Exception): From 3b30d6499e881989d70f7903ba10ad06661dc870 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 5 Apr 2026 03:39:21 +0000 Subject: [PATCH 07/19] Add snap ID selectors, report upload, and pg_stat_statements analysis - Oracle: AWR snap ID range selector (queries DBA_HIST_SNAPSHOT, collects DBA_HIST_SQLSTAT/SYSTEM_EVENT/SYSSTAT for selected range) - PostgreSQL: pgProfile sample ID range selector (queries profile.samples, collects profile.stmt_list/wait_sampling_total for selected range) - PostgreSQL: latest pg_stat_statements one-click analysis with extension check - Both: file upload for AWR HTML/text, pg_stat_statements CSV, pgProfile reports - Auto Analyse tab now has radio button mode selector per DB type - Parsed report text shown in expander when no raw data available --- tools/pg-assistant/app.py | 262 ++++++++++++++++++--- tools/pg-assistant/auto_analyse.py | 353 ++++++++++++++++++++++++++++- 2 files changed, 585 insertions(+), 30 deletions(-) diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index eb7ca9e..7073f9d 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -569,44 +569,248 @@ def _connected_db_type() -> str: db_client = st.session_state.db_client llm_client = st.session_state.llm_client db_label = db_client.db_type.upper() + is_oracle = db_client.db_type == DB_TYPE_ORACLE st.markdown( f"Collects performance data from **{db_label}** " - f"({'AWR / V$ views' if db_client.db_type == DB_TYPE_ORACLE else 'pg_stat_statements / pg_stat_*'}) " + f"({'AWR / V$ views' if is_oracle else 'pg_stat_statements / pg_stat_* / pgProfile'}) " "and generates an AI-powered summary with action plan." ) - acol1, acol2 = st.columns(2) - with acol1: - if st.button("📈 Collect Data Only", use_container_width=True): - analyser = PerformanceAnalyser( - db_client=db_client, llm_client=llm_client + # Analysis mode selector + if is_oracle: + analyse_mode = st.radio( + "Analysis mode", + [ + "Live V$ views", + "AWR Snap ID range", + "Upload report file", + ], + horizontal=True, + key="analyse_mode", + ) + else: + analyse_mode = st.radio( + "Analysis mode", + [ + "Live pg_stat_* views", + "pgProfile Snap ID range", + "Latest pg_stat_statements", + "Upload report file", + ], + horizontal=True, + key="analyse_mode", + ) + + st.divider() + + # ------- Mode: Live V$ / pg_stat_* ----------------------------------- + if analyse_mode in ("Live V$ views", "Live pg_stat_* views"): + acol1, acol2 = st.columns(2) + with acol1: + if st.button("📈 Collect Data Only", use_container_width=True): + analyser = PerformanceAnalyser( + db_client=db_client, llm_client=llm_client + ) + with st.spinner("Collecting performance data..."): + raw_data = analyser.collect_data() + st.session_state.analyser = analyser + st.session_state["_last_analysis"] = { + "raw_data": raw_data, + "analysis": None, + } + st.success("Data collected!") + + with acol2: + if st.button( + "🧠 Full Analysis (Data + LLM)", + use_container_width=True, + type="primary", + ): + analyser = PerformanceAnalyser( + db_client=db_client, llm_client=llm_client + ) + with st.spinner("Collecting data and running LLM analysis..."): + result = analyser.analyse() + st.session_state.analyser = analyser + st.session_state["_last_analysis"] = result + st.success("Analysis complete!") + + # ------- Mode: AWR Snap ID range (Oracle) ---------------------------- + elif analyse_mode == "AWR Snap ID range": + analyser = PerformanceAnalyser(db_client=db_client, llm_client=llm_client) + st.markdown("Select an AWR snapshot range from `DBA_HIST_SNAPSHOT`.") + + if st.button("🔄 Load AWR Snapshots"): + with st.spinner("Querying DBA_HIST_SNAPSHOT..."): + snaps = analyser.list_awr_snapshots() + st.session_state["_awr_snapshots"] = snaps + + snaps = st.session_state.get("_awr_snapshots", []) + if snaps: + snap_df = pd.DataFrame(snaps) + st.dataframe( + snap_df, use_container_width=True, hide_index=True, height=250 + ) + snap_ids = [int(s["snap_id"]) for s in snaps] + scol1, scol2 = st.columns(2) + with scol1: + begin_snap = st.selectbox( + "Begin Snap ID", + sorted(snap_ids), + index=max(0, len(snap_ids) - 2), + key="awr_begin", + ) + with scol2: + end_snap = st.selectbox( + "End Snap ID", + sorted(snap_ids), + index=len(snap_ids) - 1, + key="awr_end", + ) + + if st.button( + "🧠 Analyse AWR Range", + use_container_width=True, + type="primary", + ): + if begin_snap >= end_snap: + st.error("Begin Snap ID must be less than End Snap ID.") + else: + with st.spinner( + f"Collecting AWR data for snaps {begin_snap}–{end_snap}..." + ): + result = analyser.analyse_awr_snaps(begin_snap, end_snap) + st.session_state.analyser = analyser + st.session_state["_last_analysis"] = result + st.success("AWR analysis complete!") + else: + st.info("Click 'Load AWR Snapshots' to list available snapshot IDs.") + + # ------- Mode: pgProfile Snap ID range (PostgreSQL) ------------------ + elif analyse_mode == "pgProfile Snap ID range": + analyser = PerformanceAnalyser(db_client=db_client, llm_client=llm_client) + st.markdown( + "Select a pgProfile sample range from `profile.samples`. " + "Requires the [pgProfile](https://github.com/zubkov-andrei/pg_profile) extension." + ) + + if st.button("🔄 Load pgProfile Samples"): + with st.spinner("Querying profile.samples..."): + samples = analyser.list_pgprofile_samples() + if not samples: + st.warning( + "No pgProfile samples found. Is the pgProfile extension " + "installed and configured?" + ) + st.session_state["_pgprofile_samples"] = samples + + samples = st.session_state.get("_pgprofile_samples", []) + if samples: + samp_df = pd.DataFrame(samples) + st.dataframe( + samp_df, use_container_width=True, hide_index=True, height=250 ) - with st.spinner("Collecting performance data..."): - raw_data = analyser.collect_data() - st.session_state.analyser = analyser - st.session_state["_last_analysis"] = { - "raw_data": raw_data, - "analysis": None, - } - st.success("Data collected!") - - with acol2: + sample_ids = [int(s["sample_id"]) for s in samples] + pcol1, pcol2 = st.columns(2) + with pcol1: + begin_sample = st.selectbox( + "Begin Sample ID", + sorted(sample_ids), + index=max(0, len(sample_ids) - 2), + key="pgp_begin", + ) + with pcol2: + end_sample = st.selectbox( + "End Sample ID", + sorted(sample_ids), + index=len(sample_ids) - 1, + key="pgp_end", + ) + + if st.button( + "🧠 Analyse pgProfile Range", + use_container_width=True, + type="primary", + ): + if begin_sample >= end_sample: + st.error("Begin Sample ID must be less than End Sample ID.") + else: + with st.spinner( + f"Collecting pgProfile data for samples " + f"{begin_sample}–{end_sample}..." + ): + result = analyser.analyse_pgprofile_snaps( + begin_sample, end_sample + ) + st.session_state.analyser = analyser + st.session_state["_last_analysis"] = result + st.success("pgProfile analysis complete!") + else: + st.info("Click 'Load pgProfile Samples' to list available sample IDs.") + + # ------- Mode: Latest pg_stat_statements (PostgreSQL) ---------------- + elif analyse_mode == "Latest pg_stat_statements": + analyser = PerformanceAnalyser(db_client=db_client, llm_client=llm_client) + st.markdown( + "Collects the **latest cumulative snapshot** from " + "`pg_stat_statements` plus table, database, bgwriter stats " + "and unused indexes." + ) + if st.button( - "🧠 Full Analysis (Data + LLM)", + "🧠 Analyse Latest pg_stat_statements", use_container_width=True, type="primary", ): - analyser = PerformanceAnalyser( - db_client=db_client, llm_client=llm_client - ) - with st.spinner("Collecting data and running LLM analysis..."): - result = analyser.analyse() - st.session_state.analyser = analyser - st.session_state["_last_analysis"] = result - st.success("Analysis complete!") + with st.spinner("Checking pg_stat_statements extension..."): + has_ext = analyser.check_pg_stat_statements() + if not has_ext: + st.error( + "pg_stat_statements extension is not installed. " + "Run `CREATE EXTENSION pg_stat_statements;` first." + ) + else: + with st.spinner( + "Collecting pg_stat_statements data and running LLM analysis..." + ): + result = analyser.analyse_pg_stat_latest() + st.session_state.analyser = analyser + st.session_state["_last_analysis"] = result + st.success("pg_stat_statements analysis complete!") + + # ------- Mode: Upload report file ------------------------------------ + elif analyse_mode == "Upload report file": + st.markdown( + "Upload an **AWR report** (HTML/text), **pg_stat_statements CSV**, " + "or **pgProfile report** (HTML/text) for LLM-powered analysis." + ) + uploaded_file = st.file_uploader( + "Choose a report file", + type=["html", "htm", "txt", "csv", "log"], + key="report_upload", + ) + if uploaded_file is not None: + if st.button( + "🧠 Analyse Uploaded Report", + use_container_width=True, + type="primary", + ): + analyser = PerformanceAnalyser( + db_client=db_client, llm_client=llm_client + ) + file_content = uploaded_file.getvalue().decode( + "utf-8", errors="replace" + ) + with st.spinner(f"Parsing and analysing {uploaded_file.name}..."): + result = analyser.analyse_uploaded_report( + file_content, uploaded_file.name + ) + st.session_state.analyser = analyser + st.session_state["_last_analysis"] = result + st.success("Report analysis complete!") - # Display analysis results + # ------- Display analysis results (shared across all modes) ---------- last = st.session_state.get("_last_analysis") if last: st.divider() @@ -620,7 +824,7 @@ def _connected_db_type() -> str: st.divider() st.subheader("Raw Performance Data") for section_name, section_data in raw.items(): - if section_name == "db_type": + if section_name in ("db_type", "snap_range", "sample_range"): continue label = section_name.replace("_", " ").title() with st.expander(f"📊 {label}"): @@ -635,6 +839,10 @@ def _connected_db_type() -> str: else: st.info("No data available.") + if last.get("report_text") and not raw: + with st.expander("📄 Parsed Report Text"): + st.text(last["report_text"][:5000]) + # ---- History tab ---------------------------------------------------------- with tab_history: st.subheader("Query History") diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index 737a3a7..3411e25 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -1,6 +1,15 @@ -"""Performance analysis for Oracle (AWR/V$) and PostgreSQL (pg_stat_statements).""" +"""Performance analysis for Oracle (AWR/V$) and PostgreSQL (pg_stat_statements). +Supports three analysis modes: +1. Live collection from V$/pg_stat_* views +2. AWR snap-ID based report generation (Oracle) +3. Uploaded report file parsing (AWR HTML/text, pg_stat_statements CSV, pgProfile) +""" + +import csv +import io import logging +import re from typing import Any from db_client import BaseDBClient, DB_TYPE_ORACLE, DB_TYPE_POSTGRESQL @@ -78,6 +87,121 @@ ) WHERE ROWNUM <= 20 """ +# --------------------------------------------------------------------------- +# Oracle AWR snapshot queries +# --------------------------------------------------------------------------- +_ORA_LIST_SNAPSHOTS = """ + SELECT + snap_id, + dbid, + instance_number, + TO_CHAR(begin_interval_time, 'YYYY-MM-DD HH24:MI') AS begin_time, + TO_CHAR(end_interval_time, 'YYYY-MM-DD HH24:MI') AS end_time + FROM dba_hist_snapshot + ORDER BY snap_id DESC +""" + +_ORA_AWR_TOP_SQL = """ + SELECT * FROM ( + SELECT + s.sql_id, + s.plan_hash_value, + SUM(s.elapsed_time_delta) / 1e6 AS elapsed_sec, + SUM(s.executions_delta) AS executions, + SUM(s.buffer_gets_delta) AS buffer_gets, + SUM(s.disk_reads_delta) AS disk_reads, + DBMS_LOB.SUBSTR(t.sql_text, 200, 1) AS sql_text + FROM dba_hist_sqlstat s + JOIN dba_hist_sqltext t ON s.sql_id = t.sql_id AND s.dbid = t.dbid + WHERE s.snap_id BETWEEN :begin_snap AND :end_snap + GROUP BY s.sql_id, s.plan_hash_value, + DBMS_LOB.SUBSTR(t.sql_text, 200, 1) + ORDER BY elapsed_sec DESC + ) WHERE ROWNUM <= 20 +""" + +_ORA_AWR_WAIT_EVENTS = """ + SELECT * FROM ( + SELECT + event_name AS event, + SUM(total_waits_fg) AS total_waits, + ROUND(SUM(time_waited_micro_fg) / 1e6, 2) AS time_waited_sec + FROM dba_hist_system_event + WHERE snap_id BETWEEN :begin_snap AND :end_snap + AND wait_class != 'Idle' + GROUP BY event_name + ORDER BY time_waited_sec DESC + ) WHERE ROWNUM <= 20 +""" + +_ORA_AWR_SYS_STATS = """ + SELECT + stat_name AS name, + SUM(value) AS value + FROM dba_hist_sysstat + WHERE snap_id BETWEEN :begin_snap AND :end_snap + AND stat_name IN ( + 'db block gets', 'consistent gets', 'physical reads', + 'redo size', 'sorts (memory)', 'sorts (disk)', + 'rows processed', 'parse count (total)', 'parse count (hard)', + 'execute count', 'user commits', 'user rollbacks' + ) + GROUP BY stat_name + ORDER BY stat_name +""" + +# --------------------------------------------------------------------------- +# PostgreSQL pgProfile snapshot queries +# --------------------------------------------------------------------------- +_PG_LIST_PGPROFILE_SAMPLES = """ + SELECT + sample_id, + sample_time::text AS sample_time, + server_name + FROM profile.samples + ORDER BY sample_id DESC + LIMIT 100 +""" + +_PG_PGPROFILE_TOP_SQL = """ + SELECT + queryid, + LEFT(query, 200) AS query_text, + calls, + ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec, + ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec, + rows, + shared_blks_hit, + shared_blks_read + FROM profile.stmt_list sl + JOIN profile.sample_statements ss ON sl.queryid_md5 = ss.queryid_md5 + WHERE ss.sample_id BETWEEN {begin_sample} AND {end_sample} + ORDER BY total_exec_time DESC + LIMIT 20 +""" + +_PG_PGPROFILE_WAIT_EVENTS = """ + SELECT + event_type, + event, + SUM(tot_waited)::numeric AS total_waited_sec, + SUM(tot_waits) AS total_waits + FROM profile.wait_sampling_total + WHERE sample_id BETWEEN {begin_sample} AND {end_sample} + GROUP BY event_type, event + ORDER BY total_waited_sec DESC + LIMIT 20 +""" + +# --------------------------------------------------------------------------- +# PostgreSQL pg_stat_statements snapshot (latest cumulative) +# --------------------------------------------------------------------------- +_PG_STAT_STATEMENTS_EXISTS = """ + SELECT COUNT(*) AS cnt + FROM pg_extension + WHERE extname = 'pg_stat_statements' +""" + # --------------------------------------------------------------------------- # PostgreSQL performance queries # --------------------------------------------------------------------------- @@ -178,6 +302,8 @@ def __init__( self.db_client = db_client self.llm_client = llm_client + # -- public API ---------------------------------------------------------- + def collect_data(self) -> dict[str, Any]: """Collect raw performance data from the database.""" if self.db_client.db_type == DB_TYPE_ORACLE: @@ -187,8 +313,58 @@ def collect_data(self) -> dict[str, Any]: def analyse(self) -> dict[str, Any]: """Collect data, generate LLM analysis, and return everything.""" raw_data = self.collect_data() - report_text = self._format_report(raw_data) + return self._run_llm_analysis(raw_data) + + def analyse_awr_snaps(self, begin_snap: int, end_snap: int) -> dict[str, Any]: + """Collect AWR data for a snap-ID range and generate LLM analysis.""" + raw_data = self._collect_oracle_awr(begin_snap, end_snap) + return self._run_llm_analysis(raw_data) + + def analyse_uploaded_report( + self, file_content: str, file_name: str + ) -> dict[str, Any]: + """Parse an uploaded report file and generate LLM analysis.""" + parsed = parse_uploaded_report(file_content, file_name) + return self._run_llm_analysis_from_text(parsed) + + def list_awr_snapshots(self) -> list[dict[str, Any]]: + """Return available AWR snapshots from DBA_HIST_SNAPSHOT.""" + result = self.db_client.execute_query(_ORA_LIST_SNAPSHOTS) + if "error" in result: + return [] + return result.get("rows", []) + def list_pgprofile_samples(self) -> list[dict[str, Any]]: + """Return available pgProfile samples from profile.samples.""" + result = self.db_client.execute_query(_PG_LIST_PGPROFILE_SAMPLES) + if "error" in result: + return [] + return result.get("rows", []) + + def analyse_pgprofile_snaps( + self, begin_sample: int, end_sample: int + ) -> dict[str, Any]: + """Collect pgProfile data for a sample-ID range and run LLM analysis.""" + raw_data = self._collect_pgprofile(begin_sample, end_sample) + return self._run_llm_analysis(raw_data) + + def analyse_pg_stat_latest(self) -> dict[str, Any]: + """Collect latest pg_stat_statements data and run LLM analysis.""" + raw_data = self._collect_postgresql() + return self._run_llm_analysis(raw_data) + + def check_pg_stat_statements(self) -> bool: + """Check if pg_stat_statements extension is installed.""" + result = self.db_client.execute_query(_PG_STAT_STATEMENTS_EXISTS) + if "error" in result: + return False + rows = result.get("rows", []) + return bool(rows and int(rows[0].get("cnt", 0)) > 0) + + # -- internal helpers ---------------------------------------------------- + + def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]: + report_text = self._format_report(raw_data) try: llm_response = self.llm_client.generate( prompt=report_text, @@ -196,13 +372,26 @@ def analyse(self) -> dict[str, Any]: ) except (ConnectionError, RuntimeError) as exc: llm_response = f"LLM analysis failed: {exc}" - return { "raw_data": raw_data, "report_text": report_text, "analysis": llm_response, } + def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]: + try: + llm_response = self.llm_client.generate( + prompt=report_text, + system_prompt=ANALYSIS_SYSTEM_PROMPT, + ) + except (ConnectionError, RuntimeError) as exc: + llm_response = f"LLM analysis failed: {exc}" + return { + "raw_data": {}, + "report_text": report_text, + "analysis": llm_response, + } + # -- Oracle collection --------------------------------------------------- def _collect_oracle(self) -> dict[str, Any]: @@ -223,6 +412,51 @@ def _collect_oracle(self) -> dict[str, Any]: sections["db_type"] = DB_TYPE_ORACLE return sections + def _collect_oracle_awr(self, begin_snap: int, end_snap: int) -> dict[str, Any]: + """Collect AWR historical data between two snap IDs.""" + sections: dict[str, Any] = {} + snap_range = {":begin_snap": str(begin_snap), ":end_snap": str(end_snap)} + queries = { + "awr_top_sql": _ORA_AWR_TOP_SQL, + "awr_wait_events": _ORA_AWR_WAIT_EVENTS, + "awr_system_stats": _ORA_AWR_SYS_STATS, + } + for name, sql in queries.items(): + bound_sql = sql + for placeholder, val in snap_range.items(): + bound_sql = bound_sql.replace(placeholder, val) + result = self.db_client.execute_query(bound_sql) + if "error" in result: + sections[name] = {"error": result["error"]} + else: + sections[name] = result.get("rows", []) + sections["db_type"] = DB_TYPE_ORACLE + sections["snap_range"] = f"{begin_snap} - {end_snap}" + return sections + + # -- pgProfile collection ------------------------------------------------ + + def _collect_pgprofile(self, begin_sample: int, end_sample: int) -> dict[str, Any]: + """Collect pgProfile historical data between two sample IDs.""" + sections: dict[str, Any] = {} + queries = { + "pgprofile_top_sql": _PG_PGPROFILE_TOP_SQL.format( + begin_sample=begin_sample, end_sample=end_sample + ), + "pgprofile_wait_events": _PG_PGPROFILE_WAIT_EVENTS.format( + begin_sample=begin_sample, end_sample=end_sample + ), + } + for name, sql in queries.items(): + result = self.db_client.execute_query(sql) + if "error" in result: + sections[name] = {"error": result["error"]} + else: + sections[name] = result.get("rows", []) + sections["db_type"] = DB_TYPE_POSTGRESQL + sections["sample_range"] = f"{begin_sample} - {end_sample}" + return sections + # -- PostgreSQL collection ----------------------------------------------- def _collect_postgresql(self) -> dict[str, Any]: @@ -278,3 +512,116 @@ def _format_row(row: dict[str, Any]) -> str: continue items.append(f"{k}={v}") return ", ".join(items) + + +# --------------------------------------------------------------------------- +# Report file parsing +# --------------------------------------------------------------------------- +def parse_uploaded_report(content: str, file_name: str) -> str: + """Parse an uploaded report file and return text suitable for LLM analysis. + + Supported formats: + - AWR HTML report (Oracle) + - AWR text report (Oracle) + - pg_stat_statements CSV export + - pgProfile text/HTML report + - Plain text report + """ + lower_name = file_name.lower() + + if lower_name.endswith(".csv"): + return _parse_csv_report(content, file_name) + if lower_name.endswith((".html", ".htm")): + return _parse_html_report(content, file_name) + return _parse_text_report(content, file_name) + + +def _parse_csv_report(content: str, file_name: str) -> str: + """Parse a CSV file (e.g. pg_stat_statements export).""" + parts = [f"UPLOADED REPORT: {file_name}\n{'=' * 60}\n"] + parts.append("Format: CSV (likely pg_stat_statements or similar export)\n") + + reader = csv.DictReader(io.StringIO(content)) + rows = list(reader) + if not rows: + parts.append("(empty CSV)") + return "\n".join(parts) + + parts.append(f"Columns: {', '.join(rows[0].keys())}") + parts.append(f"Total rows: {len(rows)}\n") + + for i, row in enumerate(rows[:30]): + parts.append(f" [{i + 1}] {_format_row(row)}") + if len(rows) > 30: + parts.append(f" ... and {len(rows) - 30} more rows") + + return "\n".join(parts) + + +def _parse_html_report(content: str, file_name: str) -> str: + """Parse an HTML report (AWR or pgProfile) by extracting text content.""" + parts = [f"UPLOADED REPORT: {file_name}\n{'=' * 60}\n"] + + if ( + "AWR" in content[:2000].upper() + or "WORKLOAD REPOSITORY" in content[:2000].upper() + ): + parts.append("Format: Oracle AWR HTML Report\n") + elif ( + "pgprofile" in content[:2000].lower() or "pg_profile" in content[:2000].lower() + ): + parts.append("Format: pgProfile HTML Report\n") + else: + parts.append("Format: HTML Report\n") + + # Strip HTML tags to get text content + text = re.sub( + r"]*>.*?", "", content, flags=re.DOTALL | re.IGNORECASE + ) + text = re.sub( + r"]*>.*?", "", text, flags=re.DOTALL | re.IGNORECASE + ) + text = re.sub(r"<[^>]+>", " ", text) + text = re.sub(r" ", " ", text) + text = re.sub(r"<", "<", text) + text = re.sub(r">", ">", text) + text = re.sub(r"&", "&", text) + text = re.sub(r"\s+", " ", text).strip() + + # Truncate to a reasonable size for LLM context + max_chars = 15000 + if len(text) > max_chars: + parts.append(text[:max_chars]) + parts.append(f"\n... (truncated, {len(text)} total characters)") + else: + parts.append(text) + + return "\n".join(parts) + + +def _parse_text_report(content: str, file_name: str) -> str: + """Parse a plain text report (AWR text, pgProfile text, etc.).""" + parts = [f"UPLOADED REPORT: {file_name}\n{'=' * 60}\n"] + + if ( + "AWR" in content[:2000].upper() + or "WORKLOAD REPOSITORY" in content[:2000].upper() + ): + parts.append("Format: Oracle AWR Text Report\n") + elif ( + "pgprofile" in content[:2000].lower() or "pg_profile" in content[:2000].lower() + ): + parts.append("Format: pgProfile Text Report\n") + elif "pg_stat_statements" in content[:2000].lower(): + parts.append("Format: pg_stat_statements Report\n") + else: + parts.append("Format: Text Report\n") + + max_chars = 15000 + if len(content) > max_chars: + parts.append(content[:max_chars]) + parts.append(f"\n... (truncated, {len(content)} total characters)") + else: + parts.append(content) + + return "\n".join(parts) From 8bf5c9e461c7e97f7ffece8ff785399a3b1745b1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 07:31:58 +0000 Subject: [PATCH 08/19] Fix Oracle uppercase column names causing KeyError on snap_id Oracle's oracledb driver returns column names in UPPERCASE by default. Normalize to lowercase in OracleClient.execute_query() so all downstream code (AWR snap selector, auto_analyse, etc.) can use lowercase keys consistently. --- tools/pg-assistant/db_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pg-assistant/db_client.py b/tools/pg-assistant/db_client.py index a110b7e..385ce9e 100644 --- a/tools/pg-assistant/db_client.py +++ b/tools/pg-assistant/db_client.py @@ -282,7 +282,7 @@ def execute_query(self, sql: str) -> dict[str, Any]: with self._conn.cursor() as cur: cur.execute(sql) if cur.description: - columns = [desc[0] for desc in cur.description] + columns = [desc[0].lower() for desc in cur.description] raw_rows = cur.fetchall() elapsed = time.monotonic() - start rows = [dict(zip(columns, r)) for r in raw_rows] From 1f009b2879f45c73ffb01b9e36b1e9f4ef650554 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 07:50:28 +0000 Subject: [PATCH 09/19] Enhance analysis with SQL-ID-specific recommendations and rich metadata - Oracle: collect top CPU SQL (v$sql by cpu_time), full table scans (v$sql_plan TABLE ACCESS FULL), existing indexes (all_indexes + all_ind_columns with LISTAGG), stale stats (all_tab_statistics), and execution plans (v$sql_plan detail for top 5 sql_ids) - PostgreSQL: collect top CPU queries (pg_stat_statements with blk_read_time/temp_blks), seq scan tables (pg_stat_user_tables with avg rows per scan), existing indexes (pg_indexes with DDL), stale stats/vacuum (dead tuples, last_analyze), lock waits (pg_stat_activity) - Rewrote LLM system prompt to require SQL-ID-specific analysis: high-CPU SQL with exact sql_id/queryid, full table scan tables with causing sql_id, missing index CREATE statements referencing the queryid that benefits, stale stats with ANALYZE/DBMS_STATS commands, unused index DROP statements, and numbered action plan with exact SQL commands and expected improvement --- tools/pg-assistant/auto_analyse.py | 285 +++++++++++++++++++++++++++-- 1 file changed, 273 insertions(+), 12 deletions(-) diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index 3411e25..c266d60 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -87,6 +87,97 @@ ) WHERE ROWNUM <= 20 """ +_ORA_FULL_TABLE_SCANS = """ + SELECT * FROM ( + SELECT + p.sql_id, + p.plan_hash_value, + p.object_owner, + p.object_name AS table_name, + p.operation || ' ' || NVL(p.options, '') AS operation, + s.executions, + ROUND(s.elapsed_time / 1e6, 2) AS elapsed_sec, + s.buffer_gets, + s.disk_reads, + SUBSTR(s.sql_text, 1, 200) AS sql_text + FROM v$sql_plan p + JOIN v$sql s ON p.sql_id = s.sql_id + AND p.child_number = s.child_number + WHERE p.operation = 'TABLE ACCESS' + AND p.options = 'FULL' + AND p.object_owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN') + ORDER BY s.elapsed_time DESC + ) WHERE ROWNUM <= 20 +""" + +_ORA_TOP_CPU_SQL = """ + SELECT * FROM ( + SELECT + sql_id, + plan_hash_value, + ROUND(cpu_time / 1e6, 2) AS cpu_sec, + ROUND(elapsed_time / 1e6, 2) AS elapsed_sec, + executions, + buffer_gets, + ROUND(buffer_gets / GREATEST(executions, 1)) AS gets_per_exec, + SUBSTR(sql_text, 1, 200) AS sql_text + FROM v$sql + WHERE cpu_time > 0 + ORDER BY cpu_time DESC + ) WHERE ROWNUM <= 15 +""" + +_ORA_EXISTING_INDEXES = """ + SELECT + i.table_name, + i.index_name, + i.index_type, + i.uniqueness, + i.status, + i.num_rows AS index_rows, + i.last_analyzed, + LISTAGG(c.column_name, ', ') WITHIN GROUP (ORDER BY c.column_position) AS columns + FROM all_indexes i + JOIN all_ind_columns c ON i.index_name = c.index_name AND i.owner = c.index_owner + WHERE i.owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN', 'XDB', 'WMSYS') + AND i.table_owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN', 'XDB', 'WMSYS') + GROUP BY i.table_name, i.index_name, i.index_type, i.uniqueness, + i.status, i.num_rows, i.last_analyzed + ORDER BY i.table_name, i.index_name +""" + +_ORA_STALE_STATS = """ + SELECT + table_name, + num_rows, + TO_CHAR(last_analyzed, 'YYYY-MM-DD HH24:MI') AS last_analyzed, + stale_stats, + ROUND((SYSDATE - last_analyzed), 1) AS days_since_analyzed + FROM all_tab_statistics + WHERE owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN', 'XDB', 'WMSYS') + AND (stale_stats = 'YES' OR last_analyzed IS NULL + OR last_analyzed < SYSDATE - 7) + ORDER BY CASE WHEN last_analyzed IS NULL THEN 0 + ELSE last_analyzed END +""" + +_ORA_SQL_PLAN_DETAIL = """ + SELECT + sql_id, + plan_hash_value, + id AS step_id, + LPAD(' ', 2 * depth) || operation || ' ' || NVL(options, '') AS operation, + object_name, + ROUND(cost) AS cost, + cardinality AS est_rows, + bytes AS est_bytes, + access_predicates, + filter_predicates + FROM v$sql_plan + WHERE sql_id = '{sql_id}' + ORDER BY child_number, id +""" + # --------------------------------------------------------------------------- # Oracle AWR snapshot queries # --------------------------------------------------------------------------- @@ -277,14 +368,132 @@ LIMIT 20 """ +_PG_SEQ_SCAN_TABLES = """ + SELECT + schemaname, relname, + seq_scan, + seq_tup_read, + COALESCE(idx_scan, 0) AS idx_scan, + n_live_tup, + CASE WHEN seq_scan > 0 AND n_live_tup > 0 + THEN ROUND(seq_tup_read::numeric / GREATEST(seq_scan, 1)) + ELSE 0 + END AS avg_rows_per_seq_scan, + pg_relation_size(relid) / 1048576 AS table_size_mb + FROM pg_stat_user_tables + WHERE seq_scan > 0 + AND n_live_tup > 1000 + ORDER BY seq_tup_read DESC + LIMIT 20 +""" + +_PG_EXISTING_INDEXES = """ + SELECT + schemaname, tablename, indexname, + indexdef + FROM pg_indexes + WHERE schemaname NOT IN ('pg_catalog', 'information_schema') + ORDER BY tablename, indexname +""" + +_PG_STALE_STATS = """ + SELECT + schemaname, relname, + n_live_tup, + n_dead_tup, + CASE WHEN n_live_tup > 0 + THEN ROUND(n_dead_tup::numeric / n_live_tup * 100, 2) + ELSE 0 + END AS dead_pct, + last_vacuum::text, + last_autovacuum::text, + last_analyze::text, + last_autoanalyze::text + FROM pg_stat_user_tables + WHERE n_dead_tup > 1000 + OR last_analyze IS NULL + OR last_analyze < now() - interval '7 days' + ORDER BY n_dead_tup DESC + LIMIT 30 +""" + +_PG_TOP_CPU_QUERIES = """ + SELECT + queryid, + LEFT(query, 300) AS query_text, + calls, + ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec, + ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec, + rows, + shared_blks_hit, + shared_blks_read, + CASE WHEN shared_blks_hit + shared_blks_read > 0 + THEN ROUND( + shared_blks_hit::numeric + / (shared_blks_hit + shared_blks_read) * 100, 2 + ) + ELSE 100 + END AS cache_hit_pct, + ROUND((blk_read_time / 1000)::numeric, 2) AS blk_read_sec, + ROUND((blk_write_time / 1000)::numeric, 2) AS blk_write_sec, + temp_blks_read, + temp_blks_written + FROM pg_stat_statements + ORDER BY total_exec_time DESC + LIMIT 15 +""" + +_PG_LOCK_WAITS = """ + SELECT + pid, + usename, + LEFT(query, 200) AS query, + wait_event_type, + wait_event, + state, + ROUND(EXTRACT(EPOCH FROM (now() - query_start))::numeric, 2) AS running_sec + FROM pg_stat_activity + WHERE state != 'idle' + AND wait_event IS NOT NULL + ORDER BY query_start + LIMIT 20 +""" + ANALYSIS_SYSTEM_PROMPT = ( - "You are a senior database performance engineer. " - "Analyze the following database performance data and provide:\n" - "1. **Executive Summary** (2-3 sentences)\n" - "2. **Key Findings** (bullet list of important observations)\n" - "3. **Top Issues** (ranked by severity)\n" - "4. **Action Plan** (prioritized recommendations with specific SQL or steps)\n\n" - "Be concise and actionable. Use markdown formatting." + "You are a senior DBA and database performance engineer performing a deep-dive " + "analysis. You have been given detailed performance data including SQL IDs/query IDs, " + "execution plans, full table scans, existing indexes, and stats freshness.\n\n" + "Produce the following sections:\n\n" + "## Executive Summary\n" + "2-3 sentences summarising the overall database health and biggest concern.\n\n" + "## High-CPU / Long-Running SQL\n" + "For EACH problematic SQL (reference the sql_id or queryid):\n" + "- Quote the sql_id / queryid and a snippet of the SQL text\n" + "- Explain WHY it is slow (full table scan, missing index, bad stats, etc.)\n" + "- Provide the EXACT fix SQL (CREATE INDEX, ANALYZE, rewrite, etc.)\n\n" + "## Full Table Scans\n" + "List every table being full-scanned with the sql_id causing it.\n" + "- For each, check the existing indexes section — if an index already exists " + "that should have been used, suggest gathering fresh stats or checking predicates.\n" + "- If no suitable index exists, provide the exact CREATE INDEX statement.\n\n" + "## Missing / Recommended Indexes\n" + "Based on the query patterns (WHERE, JOIN, ORDER BY columns visible in SQL text), " + "suggest specific CREATE INDEX statements. Reference the sql_id/queryid that " + "would benefit.\n\n" + "## Stale Statistics / Vacuum Issues\n" + "List tables with stale or missing stats. Provide exact ANALYZE / DBMS_STATS " + "commands. For PostgreSQL, flag tables with high dead-tuple ratios needing VACUUM.\n\n" + "## Unused Indexes\n" + "List indexes that have never been scanned and recommend dropping them " + "(provide DROP INDEX statements).\n\n" + "## Action Plan (Priority Order)\n" + "Numbered list of actions sorted by impact. Each action must include:\n" + "- The specific sql_id / queryid / table affected\n" + "- The exact SQL command to execute\n" + "- Expected improvement\n\n" + "IMPORTANT: Be SPECIFIC — always reference sql_id, queryid, or table name. " + "Never give generic advice like 'add indexes where needed'. " + "Use markdown formatting with code blocks for SQL." ) @@ -397,7 +606,11 @@ def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]: def _collect_oracle(self) -> dict[str, Any]: sections: dict[str, Any] = {} queries = { - "top_sql": _ORA_TOP_SQL, + "top_cpu_sql": _ORA_TOP_CPU_SQL, + "top_elapsed_sql": _ORA_TOP_SQL, + "full_table_scans": _ORA_FULL_TABLE_SCANS, + "existing_indexes": _ORA_EXISTING_INDEXES, + "stale_statistics": _ORA_STALE_STATS, "wait_events": _ORA_WAIT_EVENTS, "system_stats": _ORA_SYS_STATS, "sga_info": _ORA_SGA, @@ -409,9 +622,37 @@ def _collect_oracle(self) -> dict[str, Any]: sections[name] = {"error": result["error"]} else: sections[name] = result.get("rows", []) + + # Collect execution plans for top 5 SQL IDs + top_sql_ids = self._extract_oracle_sql_ids(sections) + plans: list[dict[str, Any]] = [] + for sql_id in top_sql_ids[:5]: + plan_sql = _ORA_SQL_PLAN_DETAIL.format(sql_id=sql_id) + result = self.db_client.execute_query(plan_sql) + if "error" not in result: + rows = result.get("rows", []) + if rows: + plans.append({"sql_id": sql_id, "steps": rows}) + if plans: + sections["execution_plans"] = plans + sections["db_type"] = DB_TYPE_ORACLE return sections + def _extract_oracle_sql_ids(self, sections: dict[str, Any]) -> list[str]: + """Extract unique sql_ids from top SQL sections, ordered by elapsed time.""" + seen: set[str] = set() + ids: list[str] = [] + for key in ("top_cpu_sql", "top_elapsed_sql", "full_table_scans"): + data = sections.get(key, []) + if isinstance(data, list): + for row in data: + sid = row.get("sql_id", "") + if sid and sid not in seen: + seen.add(sid) + ids.append(sid) + return ids + def _collect_oracle_awr(self, begin_snap: int, end_snap: int) -> dict[str, Any]: """Collect AWR historical data between two snap IDs.""" sections: dict[str, Any] = {} @@ -462,11 +703,16 @@ def _collect_pgprofile(self, begin_sample: int, end_sample: int) -> dict[str, An def _collect_postgresql(self) -> dict[str, Any]: sections: dict[str, Any] = {} queries = { + "top_cpu_queries": _PG_TOP_CPU_QUERIES, "top_queries": _PG_TOP_QUERIES, + "seq_scan_tables": _PG_SEQ_SCAN_TABLES, + "existing_indexes": _PG_EXISTING_INDEXES, + "stale_stats_vacuum": _PG_STALE_STATS, "table_stats": _PG_TABLE_STATS, "database_stats": _PG_DB_STATS, "bgwriter_stats": _PG_BGWRITER, "unused_indexes": _PG_UNUSED_INDEXES, + "lock_waits": _PG_LOCK_WAITS, } for name, sql in queries.items(): result = self.db_client.execute_query(sql) @@ -485,19 +731,34 @@ def _format_report(self, data: dict[str, Any]) -> str: parts = [f"DATABASE PERFORMANCE REPORT ({db_type.upper()})\n{'=' * 60}\n"] for section_name, section_data in data.items(): - if section_name == "db_type": + if section_name in ("db_type", "snap_range", "sample_range"): continue parts.append(f"\n--- {section_name.upper().replace('_', ' ')} ---") if isinstance(section_data, dict) and "error" in section_data: parts.append(f" ERROR: {section_data['error']}") + elif section_name == "execution_plans" and isinstance(section_data, list): + for plan in section_data: + parts.append(f"\n PLAN FOR sql_id={plan.get('sql_id', '?')}:") + for step in plan.get("steps", [])[:20]: + parts.append(f" {_format_row(step)}") elif isinstance(section_data, list): if not section_data: parts.append(" (no data)") else: - for i, row in enumerate(section_data[:15]): + limit = ( + 25 + if section_name + in ( + "existing_indexes", + "stale_statistics", + "stale_stats_vacuum", + ) + else 15 + ) + for i, row in enumerate(section_data[:limit]): parts.append(f" [{i + 1}] {_format_row(row)}") - if len(section_data) > 15: - parts.append(f" ... and {len(section_data) - 15} more rows") + if len(section_data) > limit: + parts.append(f" ... and {len(section_data) - limit} more rows") else: parts.append(f" {section_data}") From 1aa59c1ad1faaf07a75912cf8ccb9b889c9c6528 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 08:30:33 +0000 Subject: [PATCH 10/19] Add Session/Lock Monitor and SQL Tuning Advisor tabs Session/Lock Monitor (session_monitor.py): - Active sessions view (v$session / pg_stat_activity) - Blocking lock tree with recursive hierarchy (CONNECT BY for Oracle, recursive CTE for PostgreSQL) - Lock details (v$lock / pg_locks with object names) - Long-running queries (>5s threshold) - Wait event chains - Kill/cancel session UI (ALTER SYSTEM KILL SESSION for Oracle, pg_cancel_backend/pg_terminate_backend for PostgreSQL) SQL Tuning Advisor (sql_tuning_advisor.py): - Paste any SQL, runs EXPLAIN PLAN (Oracle) or EXPLAIN (PostgreSQL) - Extracts tables from plan, collects per-table metadata: column stats, existing indexes, table stats, clustering factor - PostgreSQL: optional EXPLAIN ANALYZE with actual execution stats - LLM prompt requires step-by-step plan analysis, root cause, specific CREATE INDEX statements, SQL rewrite suggestions, stats maintenance commands, and numbered action plan Updated app.py with two new tabs in the UI. --- tools/pg-assistant/app.py | 211 ++++++++++- tools/pg-assistant/session_monitor.py | 321 +++++++++++++++++ tools/pg-assistant/sql_tuning_advisor.py | 440 +++++++++++++++++++++++ 3 files changed, 970 insertions(+), 2 deletions(-) create mode 100644 tools/pg-assistant/session_monitor.py create mode 100644 tools/pg-assistant/sql_tuning_advisor.py diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index 7073f9d..93429e8 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -20,7 +20,9 @@ ) from llm_client import LLMClient from profile_manager import ProfileManager +from session_monitor import SessionMonitor from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError +from sql_tuning_advisor import SQLTuningAdvisor # --------------------------------------------------------------------------- # Page config @@ -261,8 +263,24 @@ def _connected_db_type() -> str: # --------------------------------------------------------------------------- # Tabs # --------------------------------------------------------------------------- -tab_query, tab_schema, tab_monitor, tab_analyse, tab_history = st.tabs( - ["💬 Query", "📋 Schema", "📡 Auto Monitor", "📊 Auto Analyse", "📜 History"] +( + tab_query, + tab_schema, + tab_monitor, + tab_analyse, + tab_sessions, + tab_tuning, + tab_history, +) = st.tabs( + [ + "💬 Query", + "📋 Schema", + "📡 Auto Monitor", + "📊 Auto Analyse", + "🔒 Sessions & Locks", + "🔧 SQL Tuning Advisor", + "📜 History", + ] ) # ---- Query tab ------------------------------------------------------------ @@ -843,6 +861,195 @@ def _connected_db_type() -> str: with st.expander("📄 Parsed Report Text"): st.text(last["report_text"][:5000]) +# ---- Sessions & Locks tab ------------------------------------------------- +with tab_sessions: + st.subheader("🔒 Session & Lock Monitor") + + if not (st.session_state.db_client and st.session_state.db_client.is_connected): + st.warning("Connect to a database first.") + else: + db_client = st.session_state.db_client + monitor = SessionMonitor(db_client) + is_oracle = db_client.db_type == DB_TYPE_ORACLE + + sess_view = st.radio( + "View", + [ + "Active Sessions", + "Blocking Lock Tree", + "Lock Details", + "Long-Running Queries", + "Wait Events", + ], + horizontal=True, + key="sess_view", + ) + + if st.button("🔄 Refresh", key="sess_refresh"): + st.session_state["_sess_data"] = None + + # Fetch data based on selected view + with st.spinner("Querying sessions..."): + if sess_view == "Active Sessions": + result = monitor.get_active_sessions() + elif sess_view == "Blocking Lock Tree": + result = monitor.get_blocking_tree() + elif sess_view == "Lock Details": + result = monitor.get_lock_details() + elif sess_view == "Long-Running Queries": + result = monitor.get_long_running() + else: + result = monitor.get_wait_events() + + if "error" in result: + st.error(result["error"]) + else: + rows = result.get("rows", []) + if rows: + st.caption(f"{len(rows)} row(s)") + st.dataframe( + pd.DataFrame(rows), + use_container_width=True, + hide_index=True, + ) + + # Kill session UI + st.divider() + st.subheader("Kill / Cancel Session") + st.warning( + "Use with caution. This will terminate the selected session." + ) + kcol1, kcol2, kcol3 = st.columns([2, 2, 2]) + + if is_oracle: + with kcol1: + kill_sid = st.number_input( + "SID", min_value=1, step=1, key="kill_sid" + ) + with kcol2: + kill_serial = st.number_input( + "Serial#", min_value=1, step=1, key="kill_serial" + ) + with kcol3: + if st.button( + "⚠️ Kill Session (Oracle)", + type="primary", + key="kill_ora", + ): + kill_result = monitor.kill_session(kill_sid, kill_serial) + if kill_result.get("success"): + st.success(f"Session {kill_sid},{kill_serial} killed.") + else: + st.error(kill_result.get("error", "Kill failed")) + else: + with kcol1: + kill_pid = st.number_input( + "PID", min_value=1, step=1, key="kill_pid" + ) + with kcol2: + kill_force = st.checkbox( + "Force terminate (pg_terminate_backend)", + key="kill_force", + ) + with kcol3: + label = "⚠️ Terminate Backend" if kill_force else "Cancel Query" + if st.button(label, type="primary", key="kill_pg"): + kill_result = monitor.kill_session( + kill_pid, force=kill_force + ) + if "error" in kill_result: + st.error(kill_result["error"]) + else: + st.success( + f"PID {kill_pid} " + f"{'terminated' if kill_force else 'cancel sent'}." + ) + else: + st.info("No sessions/locks found for this view.") + +# ---- SQL Tuning Advisor tab ----------------------------------------------- +with tab_tuning: + st.subheader("🔧 SQL Tuning Advisor") + st.markdown( + "Paste a SQL statement to get its **execution plan**, table metadata, " + "and **LLM-powered tuning recommendations** (index suggestions, " + "SQL rewrites, stats maintenance)." + ) + + if not (st.session_state.db_client and st.session_state.db_client.is_connected): + st.warning("Connect to a database first.") + elif not st.session_state.llm_client: + st.warning("Configure Ollama settings and connect first.") + else: + db_client = st.session_state.db_client + llm_client = st.session_state.llm_client + is_oracle = db_client.db_type == DB_TYPE_ORACLE + + tune_sql = st.text_area( + "SQL to tune", + height=200, + placeholder=( + "SELECT o.order_id, c.customer_name, p.product_name\n" + "FROM orders o\n" + "JOIN customers c ON o.customer_id = c.id\n" + "JOIN products p ON o.product_id = p.id\n" + "WHERE o.order_date > '2024-01-01'\n" + "ORDER BY o.order_date DESC" + ), + key="tune_sql_input", + ) + + tcol1, tcol2 = st.columns(2) + with tcol1: + if not is_oracle: + run_analyze = st.checkbox( + "Use EXPLAIN ANALYZE (executes the query — use with caution)", + key="tune_analyze", + ) + else: + run_analyze = False + + with tcol2: + tune_btn = st.button( + "🔧 Analyse & Tune", + use_container_width=True, + type="primary", + key="tune_btn", + ) + + if tune_btn and tune_sql.strip(): + advisor = SQLTuningAdvisor(db_client=db_client, llm_client=llm_client) + with st.spinner( + "Running EXPLAIN, collecting metadata, analysing with LLM..." + ): + result = advisor.analyse_sql(tune_sql.strip(), run_analyze=run_analyze) + + if result.get("error"): + st.error(result["error"]) + else: + # Show execution plan + plan_text = result.get("plan_text", "") + if plan_text: + st.subheader("Execution Plan") + st.code(plan_text, language="text") + + # Show LLM analysis + analysis = result.get("analysis", "") + if analysis: + st.divider() + st.subheader("AI Tuning Recommendations") + st.markdown(analysis) + + # Show raw metadata in expander + metadata = result.get("metadata", {}) + table_meta = metadata.get("table_metadata", "") + if table_meta: + with st.expander("📋 Table Metadata (columns, indexes, stats)"): + st.text(table_meta[:8000]) + + elif tune_btn: + st.warning("Please enter a SQL statement to tune.") + # ---- History tab ---------------------------------------------------------- with tab_history: st.subheader("Query History") diff --git a/tools/pg-assistant/session_monitor.py b/tools/pg-assistant/session_monitor.py new file mode 100644 index 0000000..f025b63 --- /dev/null +++ b/tools/pg-assistant/session_monitor.py @@ -0,0 +1,321 @@ +"""Session and lock monitoring for Oracle and PostgreSQL. + +Provides live views of: +- Active sessions and their current SQL +- Blocking lock trees (who is blocking whom) +- Wait chains +- Long-running queries +""" + +import logging +from typing import Any + +from db_client import BaseDBClient, DB_TYPE_ORACLE + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Oracle session / lock queries +# --------------------------------------------------------------------------- +_ORA_ACTIVE_SESSIONS = """ + SELECT + s.sid, + s.serial# AS serial_num, + s.username, + s.status, + s.osuser, + s.machine, + s.program, + s.wait_class, + s.event, + s.seconds_in_wait, + s.sql_id, + SUBSTR(q.sql_text, 1, 200) AS sql_text, + s.blocking_session, + s.blocking_session_status + FROM v$session s + LEFT JOIN v$sql q ON s.sql_id = q.sql_id AND s.sql_child_number = q.child_number + WHERE s.type = 'USER' + AND s.status = 'ACTIVE' + ORDER BY s.seconds_in_wait DESC +""" + +_ORA_BLOCKING_TREE = """ + SELECT + LPAD(' ', 2 * (LEVEL - 1)) || s.sid || ',' || s.serial# AS session_id, + s.username, + s.status, + s.sql_id, + SUBSTR(q.sql_text, 1, 200) AS sql_text, + s.event, + s.seconds_in_wait, + s.blocking_session, + l.type AS lock_type, + DECODE(l.lmode, + 0, 'None', 1, 'Null', 2, 'Row-S', 3, 'Row-X', + 4, 'Share', 5, 'S/Row-X', 6, 'Exclusive', l.lmode) AS lock_mode, + DECODE(l.request, + 0, 'None', 1, 'Null', 2, 'Row-S', 3, 'Row-X', + 4, 'Share', 5, 'S/Row-X', 6, 'Exclusive', l.request) AS lock_request + FROM v$session s + LEFT JOIN v$sql q ON s.sql_id = q.sql_id AND s.sql_child_number = q.child_number + LEFT JOIN v$lock l ON s.sid = l.sid AND l.block > 0 + START WITH s.blocking_session IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM v$session s2 + WHERE s2.sid = s.blocking_session + AND s2.blocking_session IS NOT NULL + ) + CONNECT BY PRIOR s.sid = s.blocking_session + ORDER SIBLINGS BY s.seconds_in_wait DESC +""" + +_ORA_LOCK_DETAILS = """ + SELECT + l.sid, + s.serial# AS serial_num, + s.username, + l.type AS lock_type, + DECODE(l.lmode, + 0, 'None', 1, 'Null', 2, 'Row-S', 3, 'Row-X', + 4, 'Share', 5, 'S/Row-X', 6, 'Exclusive', l.lmode) AS lock_mode, + DECODE(l.request, + 0, 'None', 1, 'Null', 2, 'Row-S', 3, 'Row-X', + 4, 'Share', 5, 'S/Row-X', 6, 'Exclusive', l.request) AS lock_request, + l.block, + o.object_name, + o.object_type, + s.sql_id, + SUBSTR(q.sql_text, 1, 200) AS sql_text + FROM v$lock l + JOIN v$session s ON l.sid = s.sid + LEFT JOIN dba_objects o ON l.id1 = o.object_id + LEFT JOIN v$sql q ON s.sql_id = q.sql_id AND s.sql_child_number = q.child_number + WHERE l.type NOT IN ('AE', 'PS') + AND (l.block > 0 OR l.request > 0) + ORDER BY l.block DESC, l.request DESC +""" + +_ORA_LONG_RUNNING = """ + SELECT * FROM ( + SELECT + s.sid, + s.serial# AS serial_num, + s.username, + s.sql_id, + SUBSTR(q.sql_text, 1, 200) AS sql_text, + ROUND(s.last_call_et) AS running_sec, + s.event, + s.wait_class, + s.program, + s.machine + FROM v$session s + LEFT JOIN v$sql q ON s.sql_id = q.sql_id + AND s.sql_child_number = q.child_number + WHERE s.type = 'USER' + AND s.status = 'ACTIVE' + AND s.last_call_et > 5 + ORDER BY s.last_call_et DESC + ) WHERE ROWNUM <= 30 +""" + +_ORA_WAIT_CHAINS = """ + SELECT + s.sid, + s.serial# AS serial_num, + s.username, + s.event, + s.wait_class, + s.seconds_in_wait, + s.blocking_session, + s.sql_id + FROM v$session s + WHERE s.type = 'USER' + AND s.wait_class != 'Idle' + AND s.seconds_in_wait > 1 + ORDER BY s.seconds_in_wait DESC +""" + +# --------------------------------------------------------------------------- +# PostgreSQL session / lock queries +# --------------------------------------------------------------------------- +_PG_ACTIVE_SESSIONS = """ + SELECT + pid, + usename, + datname, + client_addr::text, + application_name, + state, + wait_event_type, + wait_event, + LEFT(query, 300) AS query, + ROUND(EXTRACT(EPOCH FROM (now() - query_start))::numeric, 1) AS running_sec, + ROUND(EXTRACT(EPOCH FROM (now() - backend_start))::numeric, 0) AS session_age_sec + FROM pg_stat_activity + WHERE pid != pg_backend_pid() + AND state != 'idle' + ORDER BY query_start +""" + +_PG_BLOCKING_TREE = """ + WITH RECURSIVE lock_tree AS ( + SELECT + blocked.pid AS blocked_pid, + blocked.usename AS blocked_user, + LEFT(blocked.query, 200) AS blocked_query, + blocked.wait_event_type, + blocked.wait_event, + blocking.pid AS blocking_pid, + blocking.usename AS blocking_user, + LEFT(blocking.query, 200) AS blocking_query, + 1 AS depth + FROM pg_stat_activity blocked + JOIN pg_locks bl ON bl.pid = blocked.pid + JOIN pg_locks kl ON kl.locktype = bl.locktype + AND kl.database IS NOT DISTINCT FROM bl.database + AND kl.relation IS NOT DISTINCT FROM bl.relation + AND kl.page IS NOT DISTINCT FROM bl.page + AND kl.tuple IS NOT DISTINCT FROM bl.tuple + AND kl.virtualxid IS NOT DISTINCT FROM bl.virtualxid + AND kl.transactionid IS NOT DISTINCT FROM bl.transactionid + AND kl.classid IS NOT DISTINCT FROM bl.classid + AND kl.objid IS NOT DISTINCT FROM bl.objid + AND kl.objsubid IS NOT DISTINCT FROM bl.objsubid + AND kl.pid != bl.pid + JOIN pg_stat_activity blocking ON kl.pid = blocking.pid + WHERE NOT bl.granted AND kl.granted + ) + SELECT DISTINCT + blocked_pid, + blocked_user, + blocked_query, + wait_event_type, + wait_event, + blocking_pid, + blocking_user, + blocking_query, + depth + FROM lock_tree + ORDER BY blocking_pid, depth +""" + +_PG_LOCK_DETAILS = """ + SELECT + l.pid, + a.usename, + l.locktype, + l.mode, + l.granted, + l.relation::regclass::text AS locked_relation, + LEFT(a.query, 200) AS query, + a.state, + ROUND(EXTRACT(EPOCH FROM (now() - a.query_start))::numeric, 1) AS query_sec + FROM pg_locks l + JOIN pg_stat_activity a ON l.pid = a.pid + WHERE l.pid != pg_backend_pid() + AND l.relation IS NOT NULL + ORDER BY l.granted, a.query_start +""" + +_PG_LONG_RUNNING = """ + SELECT + pid, + usename, + datname, + LEFT(query, 300) AS query, + state, + wait_event_type, + wait_event, + ROUND(EXTRACT(EPOCH FROM (now() - query_start))::numeric, 1) AS running_sec, + application_name, + client_addr::text + FROM pg_stat_activity + WHERE pid != pg_backend_pid() + AND state = 'active' + AND query_start < now() - interval '5 seconds' + ORDER BY query_start + LIMIT 30 +""" + +_PG_WAIT_EVENTS = """ + SELECT + pid, + usename, + wait_event_type, + wait_event, + state, + LEFT(query, 200) AS query, + ROUND(EXTRACT(EPOCH FROM (now() - query_start))::numeric, 1) AS running_sec + FROM pg_stat_activity + WHERE pid != pg_backend_pid() + AND wait_event IS NOT NULL + AND state != 'idle' + ORDER BY query_start +""" + + +# --------------------------------------------------------------------------- +# Kill session queries +# --------------------------------------------------------------------------- +_ORA_KILL_SESSION = "ALTER SYSTEM KILL SESSION '{sid},{serial_num}' IMMEDIATE" + +_PG_CANCEL_QUERY = "SELECT pg_cancel_backend({pid})" +_PG_TERMINATE_BACKEND = "SELECT pg_terminate_backend({pid})" + + +# --------------------------------------------------------------------------- +# SessionMonitor class +# --------------------------------------------------------------------------- +class SessionMonitor: + """Collects session and lock information from Oracle or PostgreSQL.""" + + def __init__(self, db_client: BaseDBClient) -> None: + self.db_client = db_client + + def get_active_sessions(self) -> dict[str, Any]: + """Return active (non-idle) sessions.""" + if self.db_client.db_type == DB_TYPE_ORACLE: + return self.db_client.execute_query(_ORA_ACTIVE_SESSIONS) + return self.db_client.execute_query(_PG_ACTIVE_SESSIONS) + + def get_blocking_tree(self) -> dict[str, Any]: + """Return blocking lock tree (who blocks whom).""" + if self.db_client.db_type == DB_TYPE_ORACLE: + return self.db_client.execute_query(_ORA_BLOCKING_TREE) + return self.db_client.execute_query(_PG_BLOCKING_TREE) + + def get_lock_details(self) -> dict[str, Any]: + """Return detailed lock information.""" + if self.db_client.db_type == DB_TYPE_ORACLE: + return self.db_client.execute_query(_ORA_LOCK_DETAILS) + return self.db_client.execute_query(_PG_LOCK_DETAILS) + + def get_long_running(self) -> dict[str, Any]: + """Return long-running queries (>5 seconds).""" + if self.db_client.db_type == DB_TYPE_ORACLE: + return self.db_client.execute_query(_ORA_LONG_RUNNING) + return self.db_client.execute_query(_PG_LONG_RUNNING) + + def get_wait_events(self) -> dict[str, Any]: + """Return sessions currently waiting.""" + if self.db_client.db_type == DB_TYPE_ORACLE: + return self.db_client.execute_query(_ORA_WAIT_CHAINS) + return self.db_client.execute_query(_PG_WAIT_EVENTS) + + def kill_session( + self, pid_or_sid: int, serial_num: int = 0, force: bool = False + ) -> dict[str, Any]: + """Kill/cancel a session. + + Oracle: ALTER SYSTEM KILL SESSION 'sid,serial#' IMMEDIATE + PostgreSQL: pg_cancel_backend (soft) or pg_terminate_backend (force) + """ + if self.db_client.db_type == DB_TYPE_ORACLE: + sql = _ORA_KILL_SESSION.format(sid=pid_or_sid, serial_num=serial_num) + return self.db_client.execute_statement(sql) + if force: + sql = _PG_TERMINATE_BACKEND.format(pid=pid_or_sid) + else: + sql = _PG_CANCEL_QUERY.format(pid=pid_or_sid) + return self.db_client.execute_query(sql) diff --git a/tools/pg-assistant/sql_tuning_advisor.py b/tools/pg-assistant/sql_tuning_advisor.py new file mode 100644 index 0000000..300ebfb --- /dev/null +++ b/tools/pg-assistant/sql_tuning_advisor.py @@ -0,0 +1,440 @@ +"""SQL Tuning Advisor for Oracle and PostgreSQL. + +Accepts a SQL statement, runs EXPLAIN PLAN, collects relevant metadata +(table DDL, existing indexes, stats), and uses the LLM to generate +specific tuning recommendations. +""" + +import logging +from typing import Any + +from db_client import BaseDBClient, DB_TYPE_ORACLE +from llm_client import LLMClient + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Oracle EXPLAIN helpers +# --------------------------------------------------------------------------- +_ORA_EXPLAIN_PLAN = "EXPLAIN PLAN FOR {sql}" + +_ORA_DISPLAY_PLAN = """ + SELECT plan_table_output + FROM TABLE(DBMS_XPLAN.DISPLAY('PLAN_TABLE', NULL, 'ALL')) +""" + +_ORA_TABLE_DDL = """ + SELECT + column_name, + data_type, + data_length, + data_precision, + nullable, + num_distinct, + num_nulls, + density, + histogram + FROM all_tab_col_statistics + WHERE owner = '{owner}' + AND table_name = '{table_name}' + ORDER BY column_id +""" + +_ORA_TABLE_INDEXES = """ + SELECT + i.index_name, + i.index_type, + i.uniqueness, + i.status, + i.num_rows, + i.distinct_keys, + i.clustering_factor, + TO_CHAR(i.last_analyzed, 'YYYY-MM-DD HH24:MI') AS last_analyzed, + LISTAGG(c.column_name, ', ') + WITHIN GROUP (ORDER BY c.column_position) AS columns + FROM all_indexes i + JOIN all_ind_columns c + ON i.index_name = c.index_name AND i.owner = c.index_owner + WHERE i.table_owner = '{owner}' + AND i.table_name = '{table_name}' + GROUP BY i.index_name, i.index_type, i.uniqueness, i.status, + i.num_rows, i.distinct_keys, i.clustering_factor, i.last_analyzed + ORDER BY i.index_name +""" + +_ORA_TABLE_STATS = """ + SELECT + table_name, + num_rows, + blocks, + avg_row_len, + TO_CHAR(last_analyzed, 'YYYY-MM-DD HH24:MI') AS last_analyzed, + stale_stats, + sample_size + FROM all_tab_statistics + WHERE owner = '{owner}' + AND table_name = '{table_name}' +""" + +_ORA_EXTRACT_TABLES = """ + SELECT DISTINCT + p.object_owner AS owner, + p.object_name AS table_name + FROM plan_table p + WHERE p.object_type = 'TABLE' + AND p.object_owner IS NOT NULL +""" + +# --------------------------------------------------------------------------- +# PostgreSQL EXPLAIN helpers +# --------------------------------------------------------------------------- +_PG_EXPLAIN = "EXPLAIN (ANALYZE false, COSTS true, FORMAT TEXT) {sql}" +_PG_EXPLAIN_ANALYZE = ( + "EXPLAIN (ANALYZE true, COSTS true, BUFFERS true, FORMAT TEXT) {sql}" +) + +_PG_TABLE_COLUMNS = """ + SELECT + column_name, + data_type, + is_nullable, + column_default, + character_maximum_length + FROM information_schema.columns + WHERE table_schema = '{schema}' + AND table_name = '{table_name}' + ORDER BY ordinal_position +""" + +_PG_TABLE_INDEXES = """ + SELECT + indexname, + indexdef + FROM pg_indexes + WHERE schemaname = '{schema}' + AND tablename = '{table_name}' + ORDER BY indexname +""" + +_PG_TABLE_STATS = """ + SELECT + relname, + n_live_tup, + n_dead_tup, + seq_scan, + seq_tup_read, + idx_scan, + idx_tup_fetch, + last_vacuum::text, + last_autovacuum::text, + last_analyze::text, + last_autoanalyze::text + FROM pg_stat_user_tables + WHERE schemaname = '{schema}' + AND relname = '{table_name}' +""" + +_PG_COLUMN_STATS = """ + SELECT + attname AS column_name, + n_distinct, + null_frac, + avg_width, + correlation + FROM pg_stats + WHERE schemaname = '{schema}' + AND tablename = '{table_name}' + ORDER BY attname +""" + +# --------------------------------------------------------------------------- +# LLM prompt +# --------------------------------------------------------------------------- +TUNING_SYSTEM_PROMPT = ( + "You are a senior DBA and SQL tuning expert. You have been given a SQL " + "statement, its execution plan, table structure, existing indexes, and " + "column/table statistics.\n\n" + "Produce the following sections:\n\n" + "## Execution Plan Analysis\n" + "Walk through the plan step by step. Identify:\n" + "- Full table scans (and whether they are justified)\n" + "- Nested loop joins vs hash joins (and whether the choice is optimal)\n" + "- Sort operations that could be avoided\n" + "- High-cost steps\n" + "- Estimated vs actual row discrepancies (if ANALYZE data available)\n\n" + "## Root Cause\n" + "Explain WHY the query may be slow. Reference specific plan steps, " + "missing indexes, stale statistics, or suboptimal SQL patterns.\n\n" + "## Recommended Indexes\n" + "For each suggested index:\n" + "- Provide the exact `CREATE INDEX` statement\n" + "- Explain which plan step it improves\n" + "- Note if a composite index is better than multiple single-column indexes\n\n" + "## SQL Rewrite Suggestions\n" + "If the SQL can be rewritten for better performance:\n" + "- Show the rewritten SQL in a code block\n" + "- Explain what changed and why it is faster\n" + "- Consider: subquery elimination, EXISTS vs IN, join reordering, " + "predicate pushdown, avoiding SELECT *\n\n" + "## Statistics & Maintenance\n" + "If statistics are stale or missing, provide exact commands:\n" + "- Oracle: `EXEC DBMS_STATS.GATHER_TABLE_STATS(...)` with proper params\n" + "- PostgreSQL: `ANALYZE table_name;` or `VACUUM ANALYZE table_name;`\n\n" + "## Summary Action Plan\n" + "Numbered list of actions in priority order. Each with:\n" + "- The exact SQL command to run\n" + "- Expected improvement\n\n" + "IMPORTANT: Be SPECIFIC. Reference table names, column names, and index " + "names. Provide copy-paste-ready SQL. Use markdown with code blocks." +) + + +# --------------------------------------------------------------------------- +# SQLTuningAdvisor class +# --------------------------------------------------------------------------- +class SQLTuningAdvisor: + """Analyses a SQL statement and provides tuning recommendations.""" + + def __init__( + self, + db_client: BaseDBClient, + llm_client: LLMClient, + ) -> None: + self.db_client = db_client + self.llm_client = llm_client + + def analyse_sql(self, sql: str, run_analyze: bool = False) -> dict[str, Any]: + """Run EXPLAIN on the SQL, collect metadata, and get LLM recommendations. + + Args: + sql: The SQL statement to analyse. + run_analyze: If True, use EXPLAIN ANALYZE (PostgreSQL) which + actually executes the query. Use with caution on write queries. + """ + if self.db_client.db_type == DB_TYPE_ORACLE: + return self._analyse_oracle(sql) + return self._analyse_postgresql(sql, run_analyze) + + # -- Oracle --------------------------------------------------------------- + + def _analyse_oracle(self, sql: str) -> dict[str, Any]: + sections: dict[str, str] = {} + + # 1. Run EXPLAIN PLAN + explain_result = self.db_client.execute_statement( + _ORA_EXPLAIN_PLAN.format(sql=sql) + ) + if not explain_result.get("success"): + return { + "error": f"EXPLAIN PLAN failed: {explain_result.get('error', '')}", + "plan_text": "", + "metadata": {}, + "analysis": "", + } + + # 2. Get the plan output + plan_result = self.db_client.execute_query(_ORA_DISPLAY_PLAN) + plan_lines = [] + if "error" not in plan_result: + for row in plan_result.get("rows", []): + line = row.get("plan_table_output", "") + plan_lines.append(line) + plan_text = "\n".join(plan_lines) + sections["execution_plan"] = plan_text + + # 3. Extract tables from plan and collect metadata + tables_result = self.db_client.execute_query(_ORA_EXTRACT_TABLES) + tables = [] + if "error" not in tables_result: + tables = tables_result.get("rows", []) + + metadata_parts: list[str] = [] + for tbl in tables[:10]: + owner = tbl.get("owner", "") + table_name = tbl.get("table_name", "") + if not owner or not table_name: + continue + + # Table columns + stats + col_result = self.db_client.execute_query( + _ORA_TABLE_DDL.format(owner=owner, table_name=table_name) + ) + if "error" not in col_result and col_result.get("rows"): + metadata_parts.append(f"\nTABLE: {owner}.{table_name} COLUMNS:") + for r in col_result["rows"]: + metadata_parts.append(f" {_fmt_row(r)}") + + # Indexes + idx_result = self.db_client.execute_query( + _ORA_TABLE_INDEXES.format(owner=owner, table_name=table_name) + ) + if "error" not in idx_result and idx_result.get("rows"): + metadata_parts.append(f"\nINDEXES ON {owner}.{table_name}:") + for r in idx_result["rows"]: + metadata_parts.append(f" {_fmt_row(r)}") + + # Table stats + stat_result = self.db_client.execute_query( + _ORA_TABLE_STATS.format(owner=owner, table_name=table_name) + ) + if "error" not in stat_result and stat_result.get("rows"): + metadata_parts.append(f"\nSTATISTICS FOR {owner}.{table_name}:") + for r in stat_result["rows"]: + metadata_parts.append(f" {_fmt_row(r)}") + + sections["table_metadata"] = "\n".join(metadata_parts) + + # 4. Build prompt and get LLM analysis + prompt = self._build_prompt(sql, sections) + analysis = self._get_llm_analysis(prompt) + + return { + "plan_text": plan_text, + "metadata": sections, + "analysis": analysis, + } + + # -- PostgreSQL ----------------------------------------------------------- + + def _analyse_postgresql( + self, sql: str, run_analyze: bool = False + ) -> dict[str, Any]: + sections: dict[str, str] = {} + + # 1. Run EXPLAIN + if run_analyze: + explain_sql = _PG_EXPLAIN_ANALYZE.format(sql=sql) + else: + explain_sql = _PG_EXPLAIN.format(sql=sql) + + plan_result = self.db_client.execute_query(explain_sql) + if "error" in plan_result: + return { + "error": f"EXPLAIN failed: {plan_result['error']}", + "plan_text": "", + "metadata": {}, + "analysis": "", + } + + plan_lines = [] + for row in plan_result.get("rows", []): + # PostgreSQL EXPLAIN returns a single column + line = list(row.values())[0] if row else "" + plan_lines.append(str(line)) + plan_text = "\n".join(plan_lines) + sections["execution_plan"] = plan_text + + # 2. Extract table names from the SQL (simple heuristic) + tables = self._extract_pg_tables(sql) + + # 3. Collect metadata for each table + metadata_parts: list[str] = [] + for schema, table_name in tables[:10]: + # Columns + col_result = self.db_client.execute_query( + _PG_TABLE_COLUMNS.format(schema=schema, table_name=table_name) + ) + if "error" not in col_result and col_result.get("rows"): + metadata_parts.append(f"\nTABLE: {schema}.{table_name} COLUMNS:") + for r in col_result["rows"]: + metadata_parts.append(f" {_fmt_row(r)}") + + # Indexes + idx_result = self.db_client.execute_query( + _PG_TABLE_INDEXES.format(schema=schema, table_name=table_name) + ) + if "error" not in idx_result and idx_result.get("rows"): + metadata_parts.append(f"\nINDEXES ON {schema}.{table_name}:") + for r in idx_result["rows"]: + metadata_parts.append(f" {_fmt_row(r)}") + + # Table stats + stat_result = self.db_client.execute_query( + _PG_TABLE_STATS.format(schema=schema, table_name=table_name) + ) + if "error" not in stat_result and stat_result.get("rows"): + metadata_parts.append(f"\nTABLE STATS FOR {schema}.{table_name}:") + for r in stat_result["rows"]: + metadata_parts.append(f" {_fmt_row(r)}") + + # Column stats + cstat_result = self.db_client.execute_query( + _PG_COLUMN_STATS.format(schema=schema, table_name=table_name) + ) + if "error" not in cstat_result and cstat_result.get("rows"): + metadata_parts.append(f"\nCOLUMN STATS FOR {schema}.{table_name}:") + for r in cstat_result["rows"]: + metadata_parts.append(f" {_fmt_row(r)}") + + sections["table_metadata"] = "\n".join(metadata_parts) + + # 4. Build prompt and get LLM analysis + prompt = self._build_prompt(sql, sections) + analysis = self._get_llm_analysis(prompt) + + return { + "plan_text": plan_text, + "metadata": sections, + "analysis": analysis, + } + + def _extract_pg_tables(self, sql: str) -> list[tuple[str, str]]: + """Extract table names from SQL using simple keyword parsing. + + Returns list of (schema, table_name) tuples. + """ + import re + + tables: list[tuple[str, str]] = [] + seen: set[str] = set() + + # Match FROM/JOIN followed by optional schema.table + pattern = r"(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_.]*)" + for match in re.finditer(pattern, sql, re.IGNORECASE): + full_name = match.group(1).strip().lower() + # Skip subquery aliases and keywords + if full_name in ("select", "where", "lateral", "unnest"): + continue + if full_name in seen: + continue + seen.add(full_name) + + if "." in full_name: + schema, table = full_name.rsplit(".", 1) + else: + schema, table = "public", full_name + tables.append((schema, table)) + + return tables + + # -- Shared helpers ------------------------------------------------------- + + def _build_prompt(self, sql: str, sections: dict[str, str]) -> str: + parts = [ + f"SQL STATEMENT TO TUNE:\n```sql\n{sql}\n```\n", + f"\nEXECUTION PLAN:\n```\n{sections.get('execution_plan', '(not available)')}\n```\n", + ] + meta = sections.get("table_metadata", "") + if meta: + parts.append(f"\nTABLE METADATA (columns, indexes, statistics):\n{meta}\n") + + return "\n".join(parts) + + def _get_llm_analysis(self, prompt: str) -> str: + try: + return self.llm_client.generate( + prompt=prompt, + system_prompt=TUNING_SYSTEM_PROMPT, + ) + except (ConnectionError, RuntimeError) as exc: + return f"LLM analysis failed: {exc}" + + +def _fmt_row(row: dict[str, Any]) -> str: + """Format a row dict into a compact string.""" + items = [] + for k, v in row.items(): + if v is None: + continue + items.append(f"{k}={v}") + return ", ".join(items) From 47ecc13b7b5d3142c3f02eca8dc8f0abe8ae55dd Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 08:42:52 +0000 Subject: [PATCH 11/19] Add Compare Snapshots with Plotly charts, enhanced best-practice analysis, exclude system queries, 500-char SQL text --- tools/pg-assistant/app.py | 203 ++++++ tools/pg-assistant/auto_analyse.py | 396 ++++++++++- tools/pg-assistant/requirements.txt | 1 + tools/pg-assistant/snapshot_compare.py | 889 +++++++++++++++++++++++++ 4 files changed, 1469 insertions(+), 20 deletions(-) create mode 100644 tools/pg-assistant/snapshot_compare.py diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index 93429e8..6621141 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -21,6 +21,7 @@ from llm_client import LLMClient from profile_manager import ProfileManager from session_monitor import SessionMonitor +from snapshot_compare import SnapshotComparator from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError from sql_tuning_advisor import SQLTuningAdvisor @@ -64,6 +65,33 @@ def _connected_db_type() -> str: return "" +def _render_comparison(result: dict) -> None: + """Render the snapshot comparison results with charts and delta table.""" + # Delta summary table + delta_table = result.get("delta_table", []) + if delta_table: + st.markdown("### Delta Summary") + df = pd.DataFrame(delta_table) + st.dataframe(df, use_container_width=True, hide_index=True) + + # Plotly charts + figures = result.get("figures", []) + if figures: + st.markdown("### Visual Comparison") + for fig_info in figures: + title = fig_info.get("title", "") + fig = fig_info.get("fig") + if fig is not None: + st.markdown(f"**{title}**") + st.plotly_chart(fig, use_container_width=True) + + # LLM analysis + analysis = result.get("analysis", "") + if analysis: + st.markdown("### AI Comparison Analysis") + st.markdown(analysis) + + # --------------------------------------------------------------------------- # Sidebar -- connection & profile management # --------------------------------------------------------------------------- @@ -268,6 +296,7 @@ def _connected_db_type() -> str: tab_schema, tab_monitor, tab_analyse, + tab_compare, tab_sessions, tab_tuning, tab_history, @@ -277,6 +306,7 @@ def _connected_db_type() -> str: "📋 Schema", "📡 Auto Monitor", "📊 Auto Analyse", + "🔀 Compare Snapshots", "🔒 Sessions & Locks", "🔧 SQL Tuning Advisor", "📜 History", @@ -1050,6 +1080,179 @@ def _connected_db_type() -> str: elif tune_btn: st.warning("Please enter a SQL statement to tune.") +# ---- Compare Snapshots tab ------------------------------------------------ +with tab_compare: + st.subheader("Compare Two Snapshots") + + if not st.session_state.db_client: + st.warning("Connect to a database first.") + else: + db_type = _connected_db_type() + comparator = SnapshotComparator( + st.session_state.db_client, st.session_state.llm_client + ) + + if db_type == DB_TYPE_ORACLE: + st.markdown( + "Select **two AWR snapshot ranges** to compare. " + "The tool will show delta metrics and charts." + ) + # Load available snapshots + analyser_cmp = PerformanceAnalyser( + st.session_state.db_client, st.session_state.llm_client + ) + snap_result = analyser_cmp.list_awr_snapshots() + if "error" in snap_result: + st.error(f"Cannot load snapshots: {snap_result['error']}") + else: + snaps = snap_result.get("rows", []) + if not snaps: + st.info("No AWR snapshots found.") + else: + snap_ids = sorted( + {int(s["snap_id"]) for s in snaps if s.get("snap_id")} + ) + snap_labels = { + int(s["snap_id"]): ( + f"{s['snap_id']} - {s.get('end_interval_time', '')}" + ) + for s in snaps + if s.get("snap_id") + } + + col_a, col_b = st.columns(2) + with col_a: + st.markdown("**Snapshot Range A (Baseline)**") + a_begin = st.selectbox( + "A \u2014 Begin Snap", + snap_ids, + index=0, + key="cmp_a_begin", + format_func=lambda x: snap_labels.get(x, str(x)), + ) + a_end = st.selectbox( + "A \u2014 End Snap", + snap_ids, + index=min(1, len(snap_ids) - 1), + key="cmp_a_end", + format_func=lambda x: snap_labels.get(x, str(x)), + ) + with col_b: + st.markdown("**Snapshot Range B (Current)**") + b_begin = st.selectbox( + "B \u2014 Begin Snap", + snap_ids, + index=max(0, len(snap_ids) - 2), + key="cmp_b_begin", + format_func=lambda x: snap_labels.get(x, str(x)), + ) + b_end = st.selectbox( + "B \u2014 End Snap", + snap_ids, + index=len(snap_ids) - 1, + key="cmp_b_end", + format_func=lambda x: snap_labels.get(x, str(x)), + ) + + if st.button("\U0001f50d Compare Snapshots", key="cmp_ora_btn"): + if a_begin >= a_end: + st.error("Range A: Begin snap must be less than End snap.") + elif b_begin >= b_end: + st.error("Range B: Begin snap must be less than End snap.") + else: + with st.spinner("Comparing snapshots\u2026"): + result = comparator.compare_oracle( + a_begin, a_end, b_begin, b_end + ) + _render_comparison(result) + + elif db_type == DB_TYPE_POSTGRESQL: + cmp_mode = st.radio( + "Comparison mode", + ["pgProfile Sample Ranges", "pg_stat_statements (latest)"], + key="cmp_pg_mode", + horizontal=True, + ) + + if cmp_mode == "pgProfile Sample Ranges": + analyser_cmp = PerformanceAnalyser( + st.session_state.db_client, st.session_state.llm_client + ) + samp_result = analyser_cmp.list_pgprofile_samples() + if "error" in samp_result: + st.error(f"Cannot load pgProfile samples: {samp_result['error']}") + else: + samps = samp_result.get("rows", []) + if not samps: + st.info("No pgProfile samples found.") + else: + samp_ids = sorted( + {int(s["sample_id"]) for s in samps if s.get("sample_id")} + ) + samp_labels = { + int(s["sample_id"]): ( + f"{s['sample_id']} - {s.get('sample_time', '')}" + ) + for s in samps + if s.get("sample_id") + } + + col_a, col_b = st.columns(2) + with col_a: + st.markdown("**Sample Range A (Baseline)**") + sa_begin = st.selectbox( + "A \u2014 Begin Sample", + samp_ids, + index=0, + key="cmp_sa_begin", + format_func=lambda x: samp_labels.get(x, str(x)), + ) + sa_end = st.selectbox( + "A \u2014 End Sample", + samp_ids, + index=min(1, len(samp_ids) - 1), + key="cmp_sa_end", + format_func=lambda x: samp_labels.get(x, str(x)), + ) + with col_b: + st.markdown("**Sample Range B (Current)**") + sb_begin = st.selectbox( + "B \u2014 Begin Sample", + samp_ids, + index=max(0, len(samp_ids) - 2), + key="cmp_sb_begin", + format_func=lambda x: samp_labels.get(x, str(x)), + ) + sb_end = st.selectbox( + "B \u2014 End Sample", + samp_ids, + index=len(samp_ids) - 1, + key="cmp_sb_end", + format_func=lambda x: samp_labels.get(x, str(x)), + ) + + if st.button("\U0001f50d Compare Samples", key="cmp_pg_btn"): + if sa_begin >= sa_end: + st.error("Range A: Begin must be less than End.") + elif sb_begin >= sb_end: + st.error("Range B: Begin must be less than End.") + else: + with st.spinner("Comparing samples\u2026"): + result = comparator.compare_pgprofile( + sa_begin, sa_end, sb_begin, sb_end + ) + _render_comparison(result) + + else: + st.info( + "pg_stat_statements shows cumulative stats since last " + "reset. For snapshot comparison, use pgProfile sample " + "ranges above.\n\n" + "You can view the current pg_stat_statements data in " + "the **Auto Analyse** tab." + ) + + # ---- History tab ---------------------------------------------------------- with tab_history: st.subheader("Query History") diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index c266d60..7dd305e 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -29,8 +29,13 @@ executions, buffer_gets, disk_reads, - SUBSTR(sql_text, 1, 200) AS sql_text + SUBSTR(sql_fulltext, 1, 500) AS sql_text FROM v$sql + WHERE parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) ORDER BY elapsed_time DESC ) WHERE ROWNUM <= 20 """ @@ -99,13 +104,22 @@ ROUND(s.elapsed_time / 1e6, 2) AS elapsed_sec, s.buffer_gets, s.disk_reads, - SUBSTR(s.sql_text, 1, 200) AS sql_text + SUBSTR(s.sql_fulltext, 1, 500) AS sql_text FROM v$sql_plan p JOIN v$sql s ON p.sql_id = s.sql_id AND p.child_number = s.child_number WHERE p.operation = 'TABLE ACCESS' AND p.options = 'FULL' - AND p.object_owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN') + AND p.object_owner NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) + AND s.parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) ORDER BY s.elapsed_time DESC ) WHERE ROWNUM <= 20 """ @@ -120,9 +134,14 @@ executions, buffer_gets, ROUND(buffer_gets / GREATEST(executions, 1)) AS gets_per_exec, - SUBSTR(sql_text, 1, 200) AS sql_text + SUBSTR(sql_fulltext, 1, 500) AS sql_text FROM v$sql WHERE cpu_time > 0 + AND parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) ORDER BY cpu_time DESC ) WHERE ROWNUM <= 15 """ @@ -201,12 +220,17 @@ SUM(s.executions_delta) AS executions, SUM(s.buffer_gets_delta) AS buffer_gets, SUM(s.disk_reads_delta) AS disk_reads, - DBMS_LOB.SUBSTR(t.sql_text, 200, 1) AS sql_text + DBMS_LOB.SUBSTR(t.sql_text, 500, 1) AS sql_text FROM dba_hist_sqlstat s JOIN dba_hist_sqltext t ON s.sql_id = t.sql_id AND s.dbid = t.dbid WHERE s.snap_id BETWEEN :begin_snap AND :end_snap + AND s.parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) GROUP BY s.sql_id, s.plan_hash_value, - DBMS_LOB.SUBSTR(t.sql_text, 200, 1) + DBMS_LOB.SUBSTR(t.sql_text, 500, 1) ORDER BY elapsed_sec DESC ) WHERE ROWNUM <= 20 """ @@ -299,7 +323,7 @@ _PG_TOP_QUERIES = """ SELECT queryid, - LEFT(query, 200) AS query_text, + LEFT(query, 500) AS query_text, calls, ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec, ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec, @@ -314,6 +338,13 @@ ELSE 100 END AS cache_hit_pct FROM pg_stat_statements + WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database()) + AND queryid IS NOT NULL + AND query NOT LIKE 'SET %%' + AND query NOT LIKE 'RESET %%' + AND query NOT LIKE 'BEGIN%%' + AND query NOT LIKE 'COMMIT%%' + AND query NOT LIKE 'ROLLBACK%%' ORDER BY total_exec_time DESC LIMIT 20 """ @@ -420,7 +451,7 @@ _PG_TOP_CPU_QUERIES = """ SELECT queryid, - LEFT(query, 300) AS query_text, + LEFT(query, 500) AS query_text, calls, ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec, ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec, @@ -439,6 +470,13 @@ temp_blks_read, temp_blks_written FROM pg_stat_statements + WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database()) + AND queryid IS NOT NULL + AND query NOT LIKE 'SET %%' + AND query NOT LIKE 'RESET %%' + AND query NOT LIKE 'BEGIN%%' + AND query NOT LIKE 'COMMIT%%' + AND query NOT LIKE 'ROLLBACK%%' ORDER BY total_exec_time DESC LIMIT 15 """ @@ -447,7 +485,7 @@ SELECT pid, usename, - LEFT(query, 200) AS query, + LEFT(query, 500) AS query, wait_event_type, wait_event, state, @@ -459,41 +497,345 @@ LIMIT 20 """ +# --------------------------------------------------------------------------- +# Oracle best-practice checks +# --------------------------------------------------------------------------- +_ORA_ROW_CONTENTION = """ + SELECT * FROM ( + SELECT + event, + total_waits, + ROUND(time_waited / 100, 2) AS time_waited_sec, + ROUND(average_wait / 100, 4) AS avg_wait_sec + FROM v$system_event + WHERE event IN ( + 'enq: TX - row lock contention', + 'enq: TX - index contention', + 'enq: TX - allocate ITL entry', + 'enq: TM - contention', + 'enq: HW - contention', + 'buffer busy waits', + 'gc buffer busy acquire', + 'gc buffer busy release', + 'row cache lock', + 'library cache lock', + 'cursor: pin S wait on X' + ) + ORDER BY time_waited DESC + ) WHERE ROWNUM <= 20 +""" + +_ORA_SEQUENCE_NO_CACHE = """ + SELECT + sequence_owner, + sequence_name, + min_value, + max_value, + increment_by, + cache_size, + order_flag, + cycle_flag, + last_number + FROM all_sequences + WHERE sequence_owner NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) + AND (cache_size = 0 OR cache_size = 1) + ORDER BY sequence_owner, sequence_name +""" + +_ORA_HIGH_ELAPSED_PER_EXEC = """ + SELECT * FROM ( + SELECT + sql_id, + plan_hash_value, + executions, + ROUND(elapsed_time / GREATEST(executions, 1) / 1e6, 4) + AS avg_elapsed_sec, + ROUND(elapsed_time / 1e6, 2) AS total_elapsed_sec, + buffer_gets, + ROUND(buffer_gets / GREATEST(executions, 1)) AS gets_per_exec, + SUBSTR(sql_fulltext, 1, 500) AS sql_text + FROM v$sql + WHERE executions > 0 + AND elapsed_time / GREATEST(executions, 1) / 1e6 > 1 + AND parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) + ORDER BY avg_elapsed_sec DESC + ) WHERE ROWNUM <= 15 +""" + +_ORA_HIGH_EXEC_COUNT = """ + SELECT * FROM ( + SELECT + sql_id, + plan_hash_value, + executions, + ROUND(elapsed_time / 1e6, 2) AS total_elapsed_sec, + ROUND(cpu_time / 1e6, 2) AS total_cpu_sec, + buffer_gets, + ROUND(buffer_gets / GREATEST(executions, 1)) AS gets_per_exec, + SUBSTR(sql_fulltext, 1, 500) AS sql_text + FROM v$sql + WHERE executions > 1000 + AND parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) + ORDER BY executions DESC + ) WHERE ROWNUM <= 15 +""" + +_ORA_REDO_LOG_SWITCHES = """ + SELECT * FROM ( + SELECT + TO_CHAR(first_time, 'YYYY-MM-DD HH24') AS switch_hour, + COUNT(*) AS switches + FROM v$log_history + WHERE first_time > SYSDATE - 1 + GROUP BY TO_CHAR(first_time, 'YYYY-MM-DD HH24') + ORDER BY switch_hour DESC + ) WHERE ROWNUM <= 24 +""" + +_ORA_TEMP_USAGE = """ + SELECT + tablespace_name, + ROUND(SUM(bytes_used) / 1048576, 2) AS used_mb, + ROUND(SUM(bytes_free) / 1048576, 2) AS free_mb, + ROUND(SUM(bytes_used) / (SUM(bytes_used) + SUM(bytes_free)) * 100, 2) + AS pct_used + FROM v$temp_space_header + GROUP BY tablespace_name + ORDER BY pct_used DESC +""" + +_ORA_PARALLEL_QUERIES = """ + SELECT * FROM ( + SELECT + sql_id, + users_executing, + px_servers_executions AS px_servers, + ROUND(elapsed_time / 1e6, 2) AS elapsed_sec, + SUBSTR(sql_fulltext, 1, 500) AS sql_text + FROM v$sql + WHERE px_servers_executions > 0 + AND parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) + ORDER BY px_servers_executions DESC + ) WHERE ROWNUM <= 10 +""" + +# --------------------------------------------------------------------------- +# PostgreSQL best-practice checks +# --------------------------------------------------------------------------- +_PG_HIGH_ELAPSED_PER_EXEC = """ + SELECT + queryid, + LEFT(query, 500) AS query_text, + calls, + ROUND((total_exec_time / calls / 1000)::numeric, 4) AS avg_elapsed_sec, + ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec, + rows, + shared_blks_hit, + shared_blks_read, + temp_blks_read, + temp_blks_written + FROM pg_stat_statements + WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database()) + AND calls > 0 + AND total_exec_time / calls / 1000 > 1 + AND queryid IS NOT NULL + AND query NOT LIKE 'SET %%' + AND query NOT LIKE 'RESET %%' + AND query NOT LIKE 'BEGIN%%' + AND query NOT LIKE 'COMMIT%%' + AND query NOT LIKE 'ROLLBACK%%' + ORDER BY avg_elapsed_sec DESC + LIMIT 15 +""" + +_PG_HIGH_EXEC_COUNT = """ + SELECT + queryid, + LEFT(query, 500) AS query_text, + calls, + ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec, + ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec, + rows, + shared_blks_hit + shared_blks_read AS total_blocks + FROM pg_stat_statements + WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database()) + AND calls > 1000 + AND queryid IS NOT NULL + AND query NOT LIKE 'SET %%' + AND query NOT LIKE 'RESET %%' + AND query NOT LIKE 'BEGIN%%' + AND query NOT LIKE 'COMMIT%%' + AND query NOT LIKE 'ROLLBACK%%' + ORDER BY calls DESC + LIMIT 15 +""" + +_PG_BLOAT_ESTIMATE = """ + SELECT + schemaname, relname, + n_live_tup, + n_dead_tup, + CASE WHEN n_live_tup > 0 + THEN ROUND(n_dead_tup::numeric / n_live_tup * 100, 2) + ELSE 0 + END AS dead_pct, + pg_relation_size(relid) / 1048576 AS table_size_mb, + last_autovacuum::text, + last_autoanalyze::text + FROM pg_stat_user_tables + WHERE n_dead_tup > 10000 + OR (n_live_tup > 0 AND n_dead_tup::numeric / n_live_tup > 0.2) + ORDER BY n_dead_tup DESC + LIMIT 20 +""" + +_PG_SEQUENCE_CACHE = """ + SELECT + schemaname, + sequencename, + start_value, + min_value, + max_value, + increment_by, + cache_size, + cycle + FROM pg_sequences + WHERE schemaname NOT IN ('pg_catalog', 'information_schema') + AND (cache_size IS NULL OR cache_size <= 1) + ORDER BY schemaname, sequencename +""" + +_PG_TEMP_FILE_USAGE = """ + SELECT + queryid, + LEFT(query, 500) AS query_text, + calls, + temp_blks_read, + temp_blks_written, + ROUND((temp_blks_read + temp_blks_written) * 8.0 / 1024, 2) + AS temp_mb, + ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec + FROM pg_stat_statements + WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database()) + AND (temp_blks_read > 0 OR temp_blks_written > 0) + AND queryid IS NOT NULL + AND query NOT LIKE 'SET %%' + AND query NOT LIKE 'RESET %%' + AND query NOT LIKE 'BEGIN%%' + ORDER BY temp_blks_read + temp_blks_written DESC + LIMIT 15 +""" + +_PG_CONNECTION_STATS = """ + SELECT + state, + COUNT(*) AS count, + COALESCE(wait_event_type, 'None') AS wait_event_type + FROM pg_stat_activity + WHERE backend_type = 'client backend' + GROUP BY state, wait_event_type + ORDER BY count DESC +""" + +_PG_CHECKPOINT_STATS = """ + SELECT + checkpoints_timed, + checkpoints_req, + buffers_checkpoint, + buffers_clean, + buffers_backend, + maxwritten_clean, + ROUND(buffers_backend::numeric / + GREATEST(buffers_checkpoint + buffers_clean + buffers_backend, 1) + * 100, 2) AS backend_write_pct + FROM pg_stat_bgwriter +""" + ANALYSIS_SYSTEM_PROMPT = ( "You are a senior DBA and database performance engineer performing a deep-dive " "analysis. You have been given detailed performance data including SQL IDs/query IDs, " - "execution plans, full table scans, existing indexes, and stats freshness.\n\n" + "execution plans, full table scans, existing indexes, stats freshness, row contention " + "events, sequence caching issues, and other best-practice metrics.\n\n" "Produce the following sections:\n\n" "## Executive Summary\n" "2-3 sentences summarising the overall database health and biggest concern.\n\n" - "## High-CPU / Long-Running SQL\n" - "For EACH problematic SQL (reference the sql_id or queryid):\n" - "- Quote the sql_id / queryid and a snippet of the SQL text\n" - "- Explain WHY it is slow (full table scan, missing index, bad stats, etc.)\n" + "## High Elapsed Time SQL\n" + "For EACH SQL with high average elapsed time per execution (reference sql_id/queryid):\n" + "- Quote the sql_id / queryid, avg elapsed, total elapsed, and a snippet\n" + "- Explain WHY it is slow (full table scan, missing index, bad join, bad stats)\n" "- Provide the EXACT fix SQL (CREATE INDEX, ANALYZE, rewrite, etc.)\n\n" + "## High Execution Count SQL\n" + "For SQL executed thousands of times:\n" + "- Even small per-execution cost adds up; flag these with sql_id/queryid\n" + "- Suggest caching, batching, or query consolidation where applicable\n" + "- Provide exact fix SQL if index or rewrite would help\n\n" "## Full Table Scans\n" "List every table being full-scanned with the sql_id causing it.\n" "- For each, check the existing indexes section — if an index already exists " "that should have been used, suggest gathering fresh stats or checking predicates.\n" "- If no suitable index exists, provide the exact CREATE INDEX statement.\n\n" + "## Row Contention & Locking Issues\n" + "Analyse the row contention / enqueue wait events data:\n" + "- Flag 'enq: TX - row lock contention' and similar events with wait times\n" + "- Identify the likely cause (hot blocks, ITL contention, poor sequence caching)\n" + "- Provide fixes: increase INITRANS, reduce transaction scope, batch commits\n\n" + "## Sequence Caching Issues\n" + "For sequences with NOCACHE or CACHE 1:\n" + "- Explain the performance impact (row cache lock waits, redo contention)\n" + "- Provide exact ALTER SEQUENCE ... CACHE 20 (or higher) statements\n" + "- Flag ORDER sequences that may need NOORDER for better performance\n\n" "## Missing / Recommended Indexes\n" "Based on the query patterns (WHERE, JOIN, ORDER BY columns visible in SQL text), " "suggest specific CREATE INDEX statements. Reference the sql_id/queryid that " "would benefit.\n\n" - "## Stale Statistics / Vacuum Issues\n" + "## Stale Statistics / Vacuum / Bloat Issues\n" "List tables with stale or missing stats. Provide exact ANALYZE / DBMS_STATS " - "commands. For PostgreSQL, flag tables with high dead-tuple ratios needing VACUUM.\n\n" + "commands. For PostgreSQL, flag tables with high dead-tuple ratios needing VACUUM " + "and estimate bloat. For Oracle, flag tables not analysed in 7+ days.\n\n" + "## Temp Space / Sort Issues\n" + "Flag queries spilling to temp (temp_blks_read/written for PG, sorts (disk) for " + "Oracle). Suggest work_mem increase, index to avoid sort, or query rewrite.\n\n" "## Unused Indexes\n" "List indexes that have never been scanned and recommend dropping them " "(provide DROP INDEX statements).\n\n" + "## Checkpoint / Redo / WAL Issues\n" + "For Oracle: flag excessive redo log switches (>6/hour). " + "For PostgreSQL: flag high backend_write_pct (buffers_backend vs checkpoint). " + "Suggest redo log sizing or checkpoint_completion_target tuning.\n\n" "## Action Plan (Priority Order)\n" "Numbered list of actions sorted by impact. Each action must include:\n" - "- The specific sql_id / queryid / table affected\n" + "- The specific sql_id / queryid / table / sequence affected\n" "- The exact SQL command to execute\n" "- Expected improvement\n\n" - "IMPORTANT: Be SPECIFIC — always reference sql_id, queryid, or table name. " - "Never give generic advice like 'add indexes where needed'. " - "Use markdown formatting with code blocks for SQL." + "IMPORTANT RULES:\n" + "1. Be SPECIFIC — always reference sql_id, queryid, or table name.\n" + "2. Always QUOTE the full SQL text provided in the data alongside the sql_id/queryid. " + "Show the complete query text so the reader can understand exactly which SQL is problematic.\n" + "3. Never give generic advice like 'add indexes where needed'.\n" + "4. Exclude all system/internal queries — focus only on user application SQL.\n" + "5. Use markdown formatting with code blocks for SQL.\n" + "6. For each problematic SQL, show it in a code block like:\n" + " ```sql\n" + " -- sql_id: ABC123\n" + " SELECT ... (full query text from the data)\n" + " ```\n" + "7. Then explain the issue and provide the fix SQL in another code block." ) @@ -608,13 +950,20 @@ def _collect_oracle(self) -> dict[str, Any]: queries = { "top_cpu_sql": _ORA_TOP_CPU_SQL, "top_elapsed_sql": _ORA_TOP_SQL, + "high_elapsed_per_exec": _ORA_HIGH_ELAPSED_PER_EXEC, + "high_execution_count": _ORA_HIGH_EXEC_COUNT, "full_table_scans": _ORA_FULL_TABLE_SCANS, "existing_indexes": _ORA_EXISTING_INDEXES, "stale_statistics": _ORA_STALE_STATS, + "row_contention": _ORA_ROW_CONTENTION, + "sequence_no_cache": _ORA_SEQUENCE_NO_CACHE, "wait_events": _ORA_WAIT_EVENTS, "system_stats": _ORA_SYS_STATS, "sga_info": _ORA_SGA, "tablespace_io": _ORA_TABLESPACE_IO, + "redo_log_switches": _ORA_REDO_LOG_SWITCHES, + "temp_usage": _ORA_TEMP_USAGE, + "parallel_queries": _ORA_PARALLEL_QUERIES, } for name, sql in queries.items(): result = self.db_client.execute_query(sql) @@ -705,6 +1054,8 @@ def _collect_postgresql(self) -> dict[str, Any]: queries = { "top_cpu_queries": _PG_TOP_CPU_QUERIES, "top_queries": _PG_TOP_QUERIES, + "high_elapsed_per_exec": _PG_HIGH_ELAPSED_PER_EXEC, + "high_execution_count": _PG_HIGH_EXEC_COUNT, "seq_scan_tables": _PG_SEQ_SCAN_TABLES, "existing_indexes": _PG_EXISTING_INDEXES, "stale_stats_vacuum": _PG_STALE_STATS, @@ -713,6 +1064,11 @@ def _collect_postgresql(self) -> dict[str, Any]: "bgwriter_stats": _PG_BGWRITER, "unused_indexes": _PG_UNUSED_INDEXES, "lock_waits": _PG_LOCK_WAITS, + "bloat_estimate": _PG_BLOAT_ESTIMATE, + "sequence_cache_issues": _PG_SEQUENCE_CACHE, + "temp_file_usage": _PG_TEMP_FILE_USAGE, + "connection_stats": _PG_CONNECTION_STATS, + "checkpoint_stats": _PG_CHECKPOINT_STATS, } for name, sql in queries.items(): result = self.db_client.execute_query(sql) diff --git a/tools/pg-assistant/requirements.txt b/tools/pg-assistant/requirements.txt index 8efff7e..3732284 100644 --- a/tools/pg-assistant/requirements.txt +++ b/tools/pg-assistant/requirements.txt @@ -3,3 +3,4 @@ psycopg2-binary>=2.9.0,<3.0.0 oracledb>=2.0.0,<3.0.0 streamlit>=1.28.0,<2.0.0 pandas>=2.0.0,<3.0.0 +plotly>=5.0.0,<6.0.0 diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py new file mode 100644 index 0000000..518f7fc --- /dev/null +++ b/tools/pg-assistant/snapshot_compare.py @@ -0,0 +1,889 @@ +"""Compare two database snapshots with visual charts. + +Supports Oracle AWR snap-ID ranges and PostgreSQL pgProfile sample-ID ranges. +Produces Plotly figures for side-by-side comparison of key metrics. +""" + +import logging +from typing import Any + +import plotly.graph_objects as go +from plotly.subplots import make_subplots + +from db_client import BaseDBClient, DB_TYPE_ORACLE +from llm_client import LLMClient + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Oracle AWR delta queries (parameterised with :begin_snap / :end_snap) +# --------------------------------------------------------------------------- +_ORA_SNAP_TOP_SQL = """ + SELECT * FROM ( + SELECT + s.sql_id, + SUM(s.elapsed_time_delta) / 1e6 AS elapsed_sec, + SUM(s.cpu_time_delta) / 1e6 AS cpu_sec, + SUM(s.executions_delta) AS executions, + SUM(s.buffer_gets_delta) AS buffer_gets, + SUM(s.disk_reads_delta) AS disk_reads, + SUM(s.rows_processed_delta) AS rows_processed + FROM dba_hist_sqlstat s + WHERE s.snap_id BETWEEN {begin_snap} AND {end_snap} + AND s.parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) + GROUP BY s.sql_id + ORDER BY elapsed_sec DESC + ) WHERE ROWNUM <= 20 +""" + +_ORA_SNAP_WAIT_EVENTS = """ + SELECT * FROM ( + SELECT + event_name AS event, + wait_class, + SUM(total_waits_fg) AS total_waits, + ROUND(SUM(time_waited_micro_fg) / 1e6, 2) AS time_waited_sec + FROM dba_hist_system_event + WHERE snap_id BETWEEN {begin_snap} AND {end_snap} + AND wait_class != 'Idle' + GROUP BY event_name, wait_class + ORDER BY time_waited_sec DESC + ) WHERE ROWNUM <= 15 +""" + +_ORA_SNAP_SYS_STATS = """ + SELECT + stat_name AS name, + SUM(value) AS value + FROM dba_hist_sysstat + WHERE snap_id BETWEEN {begin_snap} AND {end_snap} + AND stat_name IN ( + 'db block gets', 'consistent gets', 'physical reads', + 'redo size', 'sorts (memory)', 'sorts (disk)', + 'rows processed', 'parse count (total)', 'parse count (hard)', + 'execute count', 'user commits', 'user rollbacks', + 'enqueue waits', 'enqueue timeouts' + ) + GROUP BY stat_name + ORDER BY stat_name +""" + +_ORA_SNAP_TOP_ELAPSED = """ + SELECT * FROM ( + SELECT + s.sql_id, + ROUND(SUM(s.elapsed_time_delta) / GREATEST(SUM(s.executions_delta), 1) / 1e6, 4) + AS avg_elapsed_sec, + SUM(s.executions_delta) AS executions, + SUM(s.buffer_gets_delta) AS buffer_gets + FROM dba_hist_sqlstat s + WHERE s.snap_id BETWEEN {begin_snap} AND {end_snap} + AND s.parsing_schema_name NOT IN ( + 'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS', + 'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS', + 'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS' + ) + GROUP BY s.sql_id + HAVING SUM(s.executions_delta) > 0 + ORDER BY avg_elapsed_sec DESC + ) WHERE ROWNUM <= 15 +""" + +# --------------------------------------------------------------------------- +# PostgreSQL pgProfile delta queries (parameterised with {begin_sample}/{end_sample}) +# --------------------------------------------------------------------------- +_PG_SNAP_TOP_SQL = """ + SELECT + sl.queryid::text AS queryid, + SUM(ss.exec_time) / 1000.0 AS elapsed_sec, + SUM(ss.calls) AS executions, + SUM(ss.shared_blks_hit) AS shared_blks_hit, + SUM(ss.shared_blks_read) AS shared_blks_read, + SUM(ss.rows) AS rows_processed + FROM profile.stmt_list sl + JOIN profile.sample_statements ss ON sl.queryid_md5 = ss.queryid_md5 + WHERE ss.sample_id BETWEEN {begin_sample} AND {end_sample} + GROUP BY sl.queryid + ORDER BY elapsed_sec DESC + LIMIT 20 +""" + +_PG_SNAP_WAIT_EVENTS = """ + SELECT + event_type, + event, + SUM(tot_waited)::numeric AS time_waited_sec, + SUM(tot_waits) AS total_waits + FROM profile.wait_sampling_total + WHERE sample_id BETWEEN {begin_sample} AND {end_sample} + GROUP BY event_type, event + ORDER BY time_waited_sec DESC + LIMIT 15 +""" + +# PostgreSQL pg_stat_statements cumulative (no snap range - latest snapshot) +_PG_STAT_TOP_SQL = """ + SELECT + queryid::text AS queryid, + LEFT(query, 120) AS query_text, + ROUND((total_exec_time / 1000)::numeric, 2) AS elapsed_sec, + calls AS executions, + shared_blks_hit, + shared_blks_read, + rows AS rows_processed, + ROUND((mean_exec_time / 1000)::numeric, 4) AS avg_elapsed_sec + FROM pg_stat_statements + WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database()) + AND queryid IS NOT NULL + AND query NOT LIKE 'SET %%' + AND query NOT LIKE 'RESET %%' + AND query NOT LIKE 'BEGIN%%' + AND query NOT LIKE 'COMMIT%%' + AND query NOT LIKE 'ROLLBACK%%' + ORDER BY total_exec_time DESC + LIMIT 20 +""" + +_PG_DB_STATS = """ + SELECT + xact_commit, xact_rollback, + blks_read, blks_hit, + tup_returned, tup_fetched, + tup_inserted, tup_updated, tup_deleted, + temp_files, temp_bytes + FROM pg_stat_database + WHERE datname = current_database() +""" + + +# --------------------------------------------------------------------------- +# Comparison engine +# --------------------------------------------------------------------------- +class SnapshotComparator: + """Compare two snapshot ranges and produce delta metrics + Plotly charts.""" + + def __init__(self, db_client: BaseDBClient, llm_client: LLMClient) -> None: + self.db = db_client + self.llm = llm_client + self.is_oracle = db_client.db_type == DB_TYPE_ORACLE + + # -- public API ---------------------------------------------------------- + + def compare_oracle( + self, + snap_a_begin: int, + snap_a_end: int, + snap_b_begin: int, + snap_b_end: int, + ) -> dict[str, Any]: + """Compare two AWR snap-ID ranges and return metrics + figures.""" + data_a = self._collect_oracle_snap(snap_a_begin, snap_a_end) + data_b = self._collect_oracle_snap(snap_b_begin, snap_b_end) + label_a = f"Snap {snap_a_begin}\u2013{snap_a_end}" + label_b = f"Snap {snap_b_begin}\u2013{snap_b_end}" + return self._build_comparison(data_a, data_b, label_a, label_b) + + def compare_pgprofile( + self, + samp_a_begin: int, + samp_a_end: int, + samp_b_begin: int, + samp_b_end: int, + ) -> dict[str, Any]: + """Compare two pgProfile sample-ID ranges.""" + data_a = self._collect_pg_snap(samp_a_begin, samp_a_end) + data_b = self._collect_pg_snap(samp_b_begin, samp_b_end) + label_a = f"Sample {samp_a_begin}\u2013{samp_a_end}" + label_b = f"Sample {samp_b_begin}\u2013{samp_b_end}" + return self._build_comparison(data_a, data_b, label_a, label_b) + + # -- data collection ----------------------------------------------------- + + def _run_query(self, sql: str) -> list[dict[str, Any]]: + result = self.db.execute_query(sql) + if "error" in result: + logger.warning("Query error: %s", result["error"]) + return [] + return result.get("rows", []) + + def _collect_oracle_snap(self, begin: int, end: int) -> dict[str, Any]: + fmt = {"begin_snap": str(begin), "end_snap": str(end)} + return { + "top_sql": self._run_query(_ORA_SNAP_TOP_SQL.format(**fmt)), + "top_elapsed": self._run_query(_ORA_SNAP_TOP_ELAPSED.format(**fmt)), + "wait_events": self._run_query(_ORA_SNAP_WAIT_EVENTS.format(**fmt)), + "sys_stats": self._run_query(_ORA_SNAP_SYS_STATS.format(**fmt)), + "snap_range": f"{begin}-{end}", + } + + def _collect_pg_snap(self, begin: int, end: int) -> dict[str, Any]: + fmt = {"begin_sample": str(begin), "end_sample": str(end)} + return { + "top_sql": self._run_query(_PG_SNAP_TOP_SQL.format(**fmt)), + "wait_events": self._run_query(_PG_SNAP_WAIT_EVENTS.format(**fmt)), + "snap_range": f"{begin}-{end}", + } + + # -- comparison logic ---------------------------------------------------- + + def _build_comparison( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> dict[str, Any]: + figures: list[dict[str, Any]] = [] + + # 1) Top SQL by elapsed time - grouped bar chart + fig_sql = self._chart_top_sql_elapsed(data_a, data_b, label_a, label_b) + if fig_sql: + figures.append({"title": "Top SQL by Elapsed Time", "fig": fig_sql}) + + # 2) Top SQL by executions - grouped bar chart + fig_exec = self._chart_top_sql_executions(data_a, data_b, label_a, label_b) + if fig_exec: + figures.append({"title": "Top SQL by Executions", "fig": fig_exec}) + + # 3) Top SQL by buffer gets - grouped bar chart + fig_buf = self._chart_top_sql_buffer_gets(data_a, data_b, label_a, label_b) + if fig_buf: + figures.append({"title": "Top SQL by Buffer Gets", "fig": fig_buf}) + + # 4) Wait events comparison - grouped bar chart + fig_waits = self._chart_wait_events(data_a, data_b, label_a, label_b) + if fig_waits: + figures.append({"title": "Wait Events Comparison", "fig": fig_waits}) + + # 5) Wait events by class/type - pie charts side by side + fig_pie = self._chart_wait_pie(data_a, data_b, label_a, label_b) + if fig_pie: + figures.append({"title": "Wait Time Distribution", "fig": fig_pie}) + + # 6) System stats comparison (Oracle only) + if self.is_oracle: + fig_sys = self._chart_sys_stats(data_a, data_b, label_a, label_b) + if fig_sys: + figures.append({"title": "System Statistics Delta", "fig": fig_sys}) + + # 7) SQL elapsed per execution (Oracle only - has top_elapsed) + if self.is_oracle: + fig_avg = self._chart_avg_elapsed(data_a, data_b, label_a, label_b) + if fig_avg: + figures.append( + {"title": "Avg Elapsed per Execution (Top SQL)", "fig": fig_avg} + ) + + # Build delta summary table + delta_table = self._build_delta_table(data_a, data_b, label_a, label_b) + + # LLM comparison summary + comparison_text = self._format_comparison_text( + data_a, data_b, label_a, label_b, delta_table + ) + analysis = self._get_llm_comparison(comparison_text) + + return { + "figures": figures, + "delta_table": delta_table, + "data_a": data_a, + "data_b": data_b, + "label_a": label_a, + "label_b": label_b, + "analysis": analysis, + } + + # -- chart builders ------------------------------------------------------ + + def _chart_top_sql_elapsed( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> go.Figure | None: + sql_a = data_a.get("top_sql", []) + sql_b = data_b.get("top_sql", []) + if not sql_a and not sql_b: + return None + + id_key = "sql_id" if self.is_oracle else "queryid" + all_ids = [] + map_a: dict[str, float] = {} + map_b: dict[str, float] = {} + + for row in sql_a[:10]: + sid = str(row.get(id_key, "")) + if sid: + all_ids.append(sid) + map_a[sid] = float(row.get("elapsed_sec", 0)) + for row in sql_b[:10]: + sid = str(row.get(id_key, "")) + if sid and sid not in all_ids: + all_ids.append(sid) + map_b[sid] = float(row.get("elapsed_sec", 0)) + + if not all_ids: + return None + + ids = all_ids[:12] + short_ids = [s[:13] for s in ids] + + fig = go.Figure() + fig.add_trace( + go.Bar( + name=label_a, + x=short_ids, + y=[map_a.get(i, 0) for i in ids], + marker_color="#636EFA", + ) + ) + fig.add_trace( + go.Bar( + name=label_b, + x=short_ids, + y=[map_b.get(i, 0) for i in ids], + marker_color="#EF553B", + ) + ) + fig.update_layout( + barmode="group", + title="Top SQL \u2014 Elapsed Time (seconds)", + xaxis_title="SQL ID" if self.is_oracle else "Query ID", + yaxis_title="Elapsed (sec)", + height=420, + legend=dict(orientation="h", yanchor="bottom", y=1.02), + ) + return fig + + def _chart_top_sql_executions( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> go.Figure | None: + sql_a = data_a.get("top_sql", []) + sql_b = data_b.get("top_sql", []) + if not sql_a and not sql_b: + return None + + id_key = "sql_id" if self.is_oracle else "queryid" + all_ids: list[str] = [] + map_a: dict[str, float] = {} + map_b: dict[str, float] = {} + + for row in sql_a[:10]: + sid = str(row.get(id_key, "")) + if sid: + all_ids.append(sid) + map_a[sid] = float(row.get("executions", 0)) + for row in sql_b[:10]: + sid = str(row.get(id_key, "")) + if sid and sid not in all_ids: + all_ids.append(sid) + map_b[sid] = float(row.get("executions", 0)) + + if not all_ids: + return None + + ids = all_ids[:12] + short_ids = [s[:13] for s in ids] + + fig = go.Figure() + fig.add_trace( + go.Bar( + name=label_a, + x=short_ids, + y=[map_a.get(i, 0) for i in ids], + marker_color="#636EFA", + ) + ) + fig.add_trace( + go.Bar( + name=label_b, + x=short_ids, + y=[map_b.get(i, 0) for i in ids], + marker_color="#EF553B", + ) + ) + fig.update_layout( + barmode="group", + title="Top SQL \u2014 Executions", + xaxis_title="SQL ID" if self.is_oracle else "Query ID", + yaxis_title="Executions", + height=420, + legend=dict(orientation="h", yanchor="bottom", y=1.02), + ) + return fig + + def _chart_top_sql_buffer_gets( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> go.Figure | None: + sql_a = data_a.get("top_sql", []) + sql_b = data_b.get("top_sql", []) + if not sql_a and not sql_b: + return None + + id_key = "sql_id" if self.is_oracle else "queryid" + buf_key = "buffer_gets" if self.is_oracle else "shared_blks_hit" + all_ids: list[str] = [] + map_a: dict[str, float] = {} + map_b: dict[str, float] = {} + + for row in sql_a[:10]: + sid = str(row.get(id_key, "")) + if sid: + all_ids.append(sid) + map_a[sid] = float(row.get(buf_key, 0)) + for row in sql_b[:10]: + sid = str(row.get(id_key, "")) + if sid and sid not in all_ids: + all_ids.append(sid) + map_b[sid] = float(row.get(buf_key, 0)) + + if not all_ids: + return None + + ids = all_ids[:12] + short_ids = [s[:13] for s in ids] + + fig = go.Figure() + fig.add_trace( + go.Bar( + name=label_a, + x=short_ids, + y=[map_a.get(i, 0) for i in ids], + marker_color="#636EFA", + ) + ) + fig.add_trace( + go.Bar( + name=label_b, + x=short_ids, + y=[map_b.get(i, 0) for i in ids], + marker_color="#EF553B", + ) + ) + fig.update_layout( + barmode="group", + title=f"Top SQL \u2014 {'Buffer Gets' if self.is_oracle else 'Shared Blocks Hit'}", + xaxis_title="SQL ID" if self.is_oracle else "Query ID", + yaxis_title="Buffer Gets" if self.is_oracle else "Shared Blocks Hit", + height=420, + legend=dict(orientation="h", yanchor="bottom", y=1.02), + ) + return fig + + def _chart_wait_events( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> go.Figure | None: + wa = data_a.get("wait_events", []) + wb = data_b.get("wait_events", []) + if not wa and not wb: + return None + + all_events: list[str] = [] + map_a: dict[str, float] = {} + map_b: dict[str, float] = {} + + for row in wa[:10]: + evt = str(row.get("event", "")) + if evt: + all_events.append(evt) + map_a[evt] = float(row.get("time_waited_sec", 0)) + for row in wb[:10]: + evt = str(row.get("event", "")) + if evt and evt not in all_events: + all_events.append(evt) + map_b[evt] = float(row.get("time_waited_sec", 0)) + + if not all_events: + return None + + events = all_events[:12] + short_events = [e[:30] for e in events] + + fig = go.Figure() + fig.add_trace( + go.Bar( + name=label_a, + x=short_events, + y=[map_a.get(e, 0) for e in events], + marker_color="#636EFA", + ) + ) + fig.add_trace( + go.Bar( + name=label_b, + x=short_events, + y=[map_b.get(e, 0) for e in events], + marker_color="#EF553B", + ) + ) + fig.update_layout( + barmode="group", + title="Wait Events \u2014 Time Waited (seconds)", + xaxis_title="Event", + yaxis_title="Time Waited (sec)", + height=420, + legend=dict(orientation="h", yanchor="bottom", y=1.02), + ) + return fig + + def _chart_wait_pie( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> go.Figure | None: + wa = data_a.get("wait_events", []) + wb = data_b.get("wait_events", []) + if not wa and not wb: + return None + + class_key = "wait_class" if self.is_oracle else "event_type" + + def aggregate_by_class(rows: list[dict]) -> tuple[list[str], list[float]]: + agg: dict[str, float] = {} + for row in rows: + cls = str(row.get(class_key, "Other")) + agg[cls] = agg.get(cls, 0) + float(row.get("time_waited_sec", 0)) + labels = list(agg.keys()) + values = list(agg.values()) + return labels, values + + labels_a, values_a = aggregate_by_class(wa) + labels_b, values_b = aggregate_by_class(wb) + + if not values_a and not values_b: + return None + + fig = make_subplots( + rows=1, + cols=2, + specs=[[{"type": "pie"}, {"type": "pie"}]], + subplot_titles=[label_a, label_b], + ) + if values_a: + fig.add_trace( + go.Pie(labels=labels_a, values=values_a, hole=0.35, name=label_a), + row=1, + col=1, + ) + if values_b: + fig.add_trace( + go.Pie(labels=labels_b, values=values_b, hole=0.35, name=label_b), + row=1, + col=2, + ) + fig.update_layout( + title="Wait Time Distribution by Class", + height=400, + ) + return fig + + def _chart_sys_stats( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> go.Figure | None: + sa = data_a.get("sys_stats", []) + sb = data_b.get("sys_stats", []) + if not sa and not sb: + return None + + map_a: dict[str, float] = {} + map_b: dict[str, float] = {} + all_names: list[str] = [] + + for row in sa: + name = str(row.get("name", "")) + if name: + all_names.append(name) + map_a[name] = float(row.get("value", 0)) + for row in sb: + name = str(row.get("name", "")) + if name and name not in all_names: + all_names.append(name) + map_b[name] = float(row.get("value", 0)) + + if not all_names: + return None + + fig = go.Figure() + fig.add_trace( + go.Bar( + name=label_a, + x=all_names, + y=[map_a.get(n, 0) for n in all_names], + marker_color="#636EFA", + ) + ) + fig.add_trace( + go.Bar( + name=label_b, + x=all_names, + y=[map_b.get(n, 0) for n in all_names], + marker_color="#EF553B", + ) + ) + fig.update_layout( + barmode="group", + title="System Statistics Comparison", + xaxis_title="Statistic", + yaxis_title="Value", + height=450, + xaxis_tickangle=-35, + legend=dict(orientation="h", yanchor="bottom", y=1.02), + ) + return fig + + def _chart_avg_elapsed( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> go.Figure | None: + ea = data_a.get("top_elapsed", []) + eb = data_b.get("top_elapsed", []) + if not ea and not eb: + return None + + all_ids: list[str] = [] + map_a: dict[str, float] = {} + map_b: dict[str, float] = {} + + for row in ea[:10]: + sid = str(row.get("sql_id", "")) + if sid: + all_ids.append(sid) + map_a[sid] = float(row.get("avg_elapsed_sec", 0)) + for row in eb[:10]: + sid = str(row.get("sql_id", "")) + if sid and sid not in all_ids: + all_ids.append(sid) + map_b[sid] = float(row.get("avg_elapsed_sec", 0)) + + if not all_ids: + return None + + ids = all_ids[:12] + short_ids = [s[:13] for s in ids] + + fig = go.Figure() + fig.add_trace( + go.Bar( + name=label_a, + x=short_ids, + y=[map_a.get(i, 0) for i in ids], + marker_color="#636EFA", + ) + ) + fig.add_trace( + go.Bar( + name=label_b, + x=short_ids, + y=[map_b.get(i, 0) for i in ids], + marker_color="#EF553B", + ) + ) + fig.update_layout( + barmode="group", + title="Avg Elapsed per Execution (seconds)", + xaxis_title="SQL ID", + yaxis_title="Avg Elapsed (sec)", + height=420, + legend=dict(orientation="h", yanchor="bottom", y=1.02), + ) + return fig + + # -- delta summary table ------------------------------------------------- + + def _build_delta_table( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + ) -> list[dict[str, Any]]: + """Build a summary table of key metric deltas between snapshots.""" + rows: list[dict[str, Any]] = [] + + # Total elapsed time across top SQL + total_a = sum(float(r.get("elapsed_sec", 0)) for r in data_a.get("top_sql", [])) + total_b = sum(float(r.get("elapsed_sec", 0)) for r in data_b.get("top_sql", [])) + rows.append( + self._delta_row( + "Total Top SQL Elapsed (sec)", total_a, total_b, label_a, label_b + ) + ) + + # Total executions across top SQL + exec_a = sum(float(r.get("executions", 0)) for r in data_a.get("top_sql", [])) + exec_b = sum(float(r.get("executions", 0)) for r in data_b.get("top_sql", [])) + rows.append( + self._delta_row( + "Total Top SQL Executions", exec_a, exec_b, label_a, label_b + ) + ) + + # Total wait time + wait_a = sum( + float(r.get("time_waited_sec", 0)) for r in data_a.get("wait_events", []) + ) + wait_b = sum( + float(r.get("time_waited_sec", 0)) for r in data_b.get("wait_events", []) + ) + rows.append( + self._delta_row("Total Wait Time (sec)", wait_a, wait_b, label_a, label_b) + ) + + # Buffer gets / shared blocks + buf_key = "buffer_gets" if self.is_oracle else "shared_blks_hit" + buf_a = sum(float(r.get(buf_key, 0)) for r in data_a.get("top_sql", [])) + buf_b = sum(float(r.get(buf_key, 0)) for r in data_b.get("top_sql", [])) + buf_label = "Buffer Gets" if self.is_oracle else "Shared Blocks Hit" + rows.append( + self._delta_row(f"Total {buf_label}", buf_a, buf_b, label_a, label_b) + ) + + # Disk reads / shared blocks read + disk_key = "disk_reads" if self.is_oracle else "shared_blks_read" + disk_a = sum(float(r.get(disk_key, 0)) for r in data_a.get("top_sql", [])) + disk_b = sum(float(r.get(disk_key, 0)) for r in data_b.get("top_sql", [])) + disk_label = "Disk Reads" if self.is_oracle else "Shared Blocks Read" + rows.append( + self._delta_row(f"Total {disk_label}", disk_a, disk_b, label_a, label_b) + ) + + # Oracle-specific system stats + if self.is_oracle: + stats_a = { + str(r.get("name", "")): float(r.get("value", 0)) + for r in data_a.get("sys_stats", []) + } + stats_b = { + str(r.get("name", "")): float(r.get("value", 0)) + for r in data_b.get("sys_stats", []) + } + for stat_name in [ + "physical reads", + "parse count (hard)", + "execute count", + "user commits", + "enqueue waits", + ]: + va = stats_a.get(stat_name, 0) + vb = stats_b.get(stat_name, 0) + if va or vb: + rows.append( + self._delta_row(stat_name.title(), va, vb, label_a, label_b) + ) + + return rows + + @staticmethod + def _delta_row( + metric: str, val_a: float, val_b: float, label_a: str, label_b: str + ) -> dict[str, Any]: + delta = val_b - val_a + pct = (delta / val_a * 100) if val_a else 0 + direction = "+" if delta > 0 else ("-" if delta < 0 else "=") + return { + "metric": metric, + label_a: round(val_a, 2), + label_b: round(val_b, 2), + "delta": round(delta, 2), + "change_pct": f"{direction}{abs(pct):.1f}%", + } + + # -- LLM comparison analysis --------------------------------------------- + + def _format_comparison_text( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + delta_table: list[dict[str, Any]], + ) -> str: + parts = [ + f"SNAPSHOT COMPARISON REPORT\n{'=' * 60}", + f"Snapshot A: {label_a}", + f"Snapshot B: {label_b}\n", + "--- DELTA SUMMARY ---", + ] + for row in delta_table: + parts.append( + f" {row['metric']}: {row[label_a]} -> {row[label_b]} " + f"(delta={row['delta']}, {row['change_pct']})" + ) + + parts.append("\n--- SNAPSHOT A: TOP SQL ---") + for i, row in enumerate(data_a.get("top_sql", [])[:10], 1): + parts.append(f" [{i}] {_fmt(row)}") + + parts.append("\n--- SNAPSHOT B: TOP SQL ---") + for i, row in enumerate(data_b.get("top_sql", [])[:10], 1): + parts.append(f" [{i}] {_fmt(row)}") + + parts.append("\n--- SNAPSHOT A: WAIT EVENTS ---") + for i, row in enumerate(data_a.get("wait_events", [])[:10], 1): + parts.append(f" [{i}] {_fmt(row)}") + + parts.append("\n--- SNAPSHOT B: WAIT EVENTS ---") + for i, row in enumerate(data_b.get("wait_events", [])[:10], 1): + parts.append(f" [{i}] {_fmt(row)}") + + return "\n".join(parts) + + def _get_llm_comparison(self, text: str) -> str: + system_prompt = ( + "You are a senior DBA comparing two database performance snapshots. " + "Produce a detailed comparison report with these sections:\n\n" + "## Executive Summary\n" + "2-3 sentences on overall change in database health between the two periods.\n\n" + "## Key Metric Changes\n" + "For each metric that changed significantly (>10%), explain the change " + "and its likely cause. Reference specific sql_id/queryid values.\n\n" + "## New or Regressed SQL\n" + "Identify SQL that appeared in Snapshot B but not A (new workload), or SQL " + "whose elapsed time increased significantly. For each, explain the likely " + "cause and provide specific fix SQL (CREATE INDEX, ANALYZE, rewrite).\n\n" + "## Wait Event Changes\n" + "Highlight wait events that increased or decreased. Explain implications " + "(e.g., increased 'enq: TX - row lock contention' suggests locking issues).\n\n" + "## Recommendations\n" + "Numbered action plan sorted by impact. Each item must include:\n" + "- The specific sql_id/queryid/object affected\n" + "- The exact SQL command to execute\n" + "- Expected improvement\n\n" + "IMPORTANT: Be SPECIFIC. Always reference sql_id, queryid, or table names. " + "Never give generic advice. Use markdown code blocks for SQL." + ) + try: + return self.llm.generate(prompt=text, system_prompt=system_prompt) + except (ConnectionError, RuntimeError) as exc: + return f"LLM comparison analysis failed: {exc}" + + +def _fmt(row: dict[str, Any]) -> str: + """Format a row dict compactly.""" + return ", ".join(f"{k}={v}" for k, v in row.items() if v is not None) From bcd80b1de106ce0863b896247d90e1e14246880c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:20:48 +0000 Subject: [PATCH 12/19] Fix AttributeError in Compare Snapshots tab: list_awr_snapshots returns list not dict --- tools/pg-assistant/app.py | 236 ++++++++++++++++++-------------------- 1 file changed, 114 insertions(+), 122 deletions(-) diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index 6621141..12fc190 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -1101,70 +1101,66 @@ def _render_comparison(result: dict) -> None: analyser_cmp = PerformanceAnalyser( st.session_state.db_client, st.session_state.llm_client ) - snap_result = analyser_cmp.list_awr_snapshots() - if "error" in snap_result: - st.error(f"Cannot load snapshots: {snap_result['error']}") + snaps = analyser_cmp.list_awr_snapshots() + if not snaps: + st.info("No AWR snapshots found.") else: - snaps = snap_result.get("rows", []) - if not snaps: - st.info("No AWR snapshots found.") - else: - snap_ids = sorted( - {int(s["snap_id"]) for s in snaps if s.get("snap_id")} + snap_ids = sorted( + {int(s["snap_id"]) for s in snaps if s.get("snap_id")} + ) + snap_labels = { + int(s["snap_id"]): ( + f"{s['snap_id']} - {s.get('end_interval_time', '')}" + ) + for s in snaps + if s.get("snap_id") + } + + col_a, col_b = st.columns(2) + with col_a: + st.markdown("**Snapshot Range A (Baseline)**") + a_begin = st.selectbox( + "A \u2014 Begin Snap", + snap_ids, + index=0, + key="cmp_a_begin", + format_func=lambda x: snap_labels.get(x, str(x)), + ) + a_end = st.selectbox( + "A \u2014 End Snap", + snap_ids, + index=min(1, len(snap_ids) - 1), + key="cmp_a_end", + format_func=lambda x: snap_labels.get(x, str(x)), + ) + with col_b: + st.markdown("**Snapshot Range B (Current)**") + b_begin = st.selectbox( + "B \u2014 Begin Snap", + snap_ids, + index=max(0, len(snap_ids) - 2), + key="cmp_b_begin", + format_func=lambda x: snap_labels.get(x, str(x)), + ) + b_end = st.selectbox( + "B \u2014 End Snap", + snap_ids, + index=len(snap_ids) - 1, + key="cmp_b_end", + format_func=lambda x: snap_labels.get(x, str(x)), ) - snap_labels = { - int(s["snap_id"]): ( - f"{s['snap_id']} - {s.get('end_interval_time', '')}" - ) - for s in snaps - if s.get("snap_id") - } - - col_a, col_b = st.columns(2) - with col_a: - st.markdown("**Snapshot Range A (Baseline)**") - a_begin = st.selectbox( - "A \u2014 Begin Snap", - snap_ids, - index=0, - key="cmp_a_begin", - format_func=lambda x: snap_labels.get(x, str(x)), - ) - a_end = st.selectbox( - "A \u2014 End Snap", - snap_ids, - index=min(1, len(snap_ids) - 1), - key="cmp_a_end", - format_func=lambda x: snap_labels.get(x, str(x)), - ) - with col_b: - st.markdown("**Snapshot Range B (Current)**") - b_begin = st.selectbox( - "B \u2014 Begin Snap", - snap_ids, - index=max(0, len(snap_ids) - 2), - key="cmp_b_begin", - format_func=lambda x: snap_labels.get(x, str(x)), - ) - b_end = st.selectbox( - "B \u2014 End Snap", - snap_ids, - index=len(snap_ids) - 1, - key="cmp_b_end", - format_func=lambda x: snap_labels.get(x, str(x)), - ) - if st.button("\U0001f50d Compare Snapshots", key="cmp_ora_btn"): - if a_begin >= a_end: - st.error("Range A: Begin snap must be less than End snap.") - elif b_begin >= b_end: - st.error("Range B: Begin snap must be less than End snap.") - else: - with st.spinner("Comparing snapshots\u2026"): - result = comparator.compare_oracle( - a_begin, a_end, b_begin, b_end - ) - _render_comparison(result) + if st.button("\U0001f50d Compare Snapshots", key="cmp_ora_btn"): + if a_begin >= a_end: + st.error("Range A: Begin snap must be less than End snap.") + elif b_begin >= b_end: + st.error("Range B: Begin snap must be less than End snap.") + else: + with st.spinner("Comparing snapshots\u2026"): + result = comparator.compare_oracle( + a_begin, a_end, b_begin, b_end + ) + _render_comparison(result) elif db_type == DB_TYPE_POSTGRESQL: cmp_mode = st.radio( @@ -1178,70 +1174,66 @@ def _render_comparison(result: dict) -> None: analyser_cmp = PerformanceAnalyser( st.session_state.db_client, st.session_state.llm_client ) - samp_result = analyser_cmp.list_pgprofile_samples() - if "error" in samp_result: - st.error(f"Cannot load pgProfile samples: {samp_result['error']}") + samps = analyser_cmp.list_pgprofile_samples() + if not samps: + st.info("No pgProfile samples found.") else: - samps = samp_result.get("rows", []) - if not samps: - st.info("No pgProfile samples found.") - else: - samp_ids = sorted( - {int(s["sample_id"]) for s in samps if s.get("sample_id")} + samp_ids = sorted( + {int(s["sample_id"]) for s in samps if s.get("sample_id")} + ) + samp_labels = { + int(s["sample_id"]): ( + f"{s['sample_id']} - {s.get('sample_time', '')}" ) - samp_labels = { - int(s["sample_id"]): ( - f"{s['sample_id']} - {s.get('sample_time', '')}" - ) - for s in samps - if s.get("sample_id") - } - - col_a, col_b = st.columns(2) - with col_a: - st.markdown("**Sample Range A (Baseline)**") - sa_begin = st.selectbox( - "A \u2014 Begin Sample", - samp_ids, - index=0, - key="cmp_sa_begin", - format_func=lambda x: samp_labels.get(x, str(x)), - ) - sa_end = st.selectbox( - "A \u2014 End Sample", - samp_ids, - index=min(1, len(samp_ids) - 1), - key="cmp_sa_end", - format_func=lambda x: samp_labels.get(x, str(x)), - ) - with col_b: - st.markdown("**Sample Range B (Current)**") - sb_begin = st.selectbox( - "B \u2014 Begin Sample", - samp_ids, - index=max(0, len(samp_ids) - 2), - key="cmp_sb_begin", - format_func=lambda x: samp_labels.get(x, str(x)), - ) - sb_end = st.selectbox( - "B \u2014 End Sample", - samp_ids, - index=len(samp_ids) - 1, - key="cmp_sb_end", - format_func=lambda x: samp_labels.get(x, str(x)), - ) + for s in samps + if s.get("sample_id") + } - if st.button("\U0001f50d Compare Samples", key="cmp_pg_btn"): - if sa_begin >= sa_end: - st.error("Range A: Begin must be less than End.") - elif sb_begin >= sb_end: - st.error("Range B: Begin must be less than End.") - else: - with st.spinner("Comparing samples\u2026"): - result = comparator.compare_pgprofile( - sa_begin, sa_end, sb_begin, sb_end - ) - _render_comparison(result) + col_a, col_b = st.columns(2) + with col_a: + st.markdown("**Sample Range A (Baseline)**") + sa_begin = st.selectbox( + "A \u2014 Begin Sample", + samp_ids, + index=0, + key="cmp_sa_begin", + format_func=lambda x: samp_labels.get(x, str(x)), + ) + sa_end = st.selectbox( + "A \u2014 End Sample", + samp_ids, + index=min(1, len(samp_ids) - 1), + key="cmp_sa_end", + format_func=lambda x: samp_labels.get(x, str(x)), + ) + with col_b: + st.markdown("**Sample Range B (Current)**") + sb_begin = st.selectbox( + "B \u2014 Begin Sample", + samp_ids, + index=max(0, len(samp_ids) - 2), + key="cmp_sb_begin", + format_func=lambda x: samp_labels.get(x, str(x)), + ) + sb_end = st.selectbox( + "B \u2014 End Sample", + samp_ids, + index=len(samp_ids) - 1, + key="cmp_sb_end", + format_func=lambda x: samp_labels.get(x, str(x)), + ) + + if st.button("\U0001f50d Compare Samples", key="cmp_pg_btn"): + if sa_begin >= sa_end: + st.error("Range A: Begin must be less than End.") + elif sb_begin >= sb_end: + st.error("Range B: Begin must be less than End.") + else: + with st.spinner("Comparing samples\u2026"): + result = comparator.compare_pgprofile( + sa_begin, sa_end, sb_begin, sb_end + ) + _render_comparison(result) else: st.info( From af3381824c7c7e557bd5dd1f94ed6707b403a495 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:49:18 +0000 Subject: [PATCH 13/19] Detect PostgreSQL version and use version-aware bgwriter/checkpoint queries (PG 17+ compat) --- tools/pg-assistant/auto_analyse.py | 55 +++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index 7dd305e..45066de 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -380,7 +380,9 @@ WHERE datname = current_database() """ -_PG_BGWRITER = """ +# PostgreSQL < 17: checkpoint columns live in pg_stat_bgwriter. +# PostgreSQL >= 17: they moved to pg_stat_checkpointer with renamed columns. +_PG_BGWRITER_LEGACY = """ SELECT checkpoints_timed, checkpoints_req, buffers_checkpoint, buffers_clean, buffers_backend, @@ -388,6 +390,18 @@ FROM pg_stat_bgwriter """ +_PG_BGWRITER_V17 = """ + SELECT + num_timed AS checkpoints_timed, + num_requested AS checkpoints_req, + buffers_written AS buffers_checkpoint, + bg.buffers_clean, + bg.buffers_alloc AS buffers_backend, + bg.maxwritten_clean + FROM pg_stat_checkpointer cp + CROSS JOIN pg_stat_bgwriter bg +""" + _PG_UNUSED_INDEXES = """ SELECT schemaname, relname, indexrelname, @@ -753,7 +767,7 @@ ORDER BY count DESC """ -_PG_CHECKPOINT_STATS = """ +_PG_CHECKPOINT_STATS_LEGACY = """ SELECT checkpoints_timed, checkpoints_req, @@ -767,6 +781,21 @@ FROM pg_stat_bgwriter """ +_PG_CHECKPOINT_STATS_V17 = """ + SELECT + cp.num_timed AS checkpoints_timed, + cp.num_requested AS checkpoints_req, + cp.buffers_written AS buffers_checkpoint, + bg.buffers_clean, + bg.buffers_alloc AS buffers_backend, + bg.maxwritten_clean, + ROUND(bg.buffers_alloc::numeric / + GREATEST(cp.buffers_written + bg.buffers_clean + bg.buffers_alloc, 1) + * 100, 2) AS backend_write_pct + FROM pg_stat_checkpointer cp + CROSS JOIN pg_stat_bgwriter bg +""" + ANALYSIS_SYSTEM_PROMPT = ( "You are a senior DBA and database performance engineer performing a deep-dive " "analysis. You have been given detailed performance data including SQL IDs/query IDs, " @@ -1049,8 +1078,26 @@ def _collect_pgprofile(self, begin_sample: int, end_sample: int) -> dict[str, An # -- PostgreSQL collection ----------------------------------------------- + def _get_pg_major_version(self) -> int: + """Return the PostgreSQL major version number (e.g. 14, 15, 16, 17).""" + result = self.db_client.execute_query( + "SELECT current_setting('server_version_num')::int AS ver" + ) + if "error" in result: + return 0 + rows = result.get("rows", []) + if rows: + # server_version_num is e.g. 170001 for 17.1, 160004 for 16.4 + return int(rows[0].get("ver", 0)) // 10000 + return 0 + def _collect_postgresql(self) -> dict[str, Any]: sections: dict[str, Any] = {} + pg_major = self._get_pg_major_version() + bgwriter_sql = _PG_BGWRITER_V17 if pg_major >= 17 else _PG_BGWRITER_LEGACY + checkpoint_sql = ( + _PG_CHECKPOINT_STATS_V17 if pg_major >= 17 else _PG_CHECKPOINT_STATS_LEGACY + ) queries = { "top_cpu_queries": _PG_TOP_CPU_QUERIES, "top_queries": _PG_TOP_QUERIES, @@ -1061,14 +1108,14 @@ def _collect_postgresql(self) -> dict[str, Any]: "stale_stats_vacuum": _PG_STALE_STATS, "table_stats": _PG_TABLE_STATS, "database_stats": _PG_DB_STATS, - "bgwriter_stats": _PG_BGWRITER, + "bgwriter_stats": bgwriter_sql, "unused_indexes": _PG_UNUSED_INDEXES, "lock_waits": _PG_LOCK_WAITS, "bloat_estimate": _PG_BLOAT_ESTIMATE, "sequence_cache_issues": _PG_SEQUENCE_CACHE, "temp_file_usage": _PG_TEMP_FILE_USAGE, "connection_stats": _PG_CONNECTION_STATS, - "checkpoint_stats": _PG_CHECKPOINT_STATS, + "checkpoint_stats": checkpoint_sql, } for name, sql in queries.items(): result = self.db_client.execute_query(sql) From 740214931d75f220395244efcfc00ff010fd4d83 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 10:36:44 +0000 Subject: [PATCH 14/19] Fix LLM hallucination: simplify system prompts, remove example placeholders, add data-grounding instructions --- tools/pg-assistant/auto_analyse.py | 82 +++++++------------------- tools/pg-assistant/snapshot_compare.py | 24 +++----- 2 files changed, 28 insertions(+), 78 deletions(-) diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index 45066de..f15857f 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -797,74 +797,29 @@ """ ANALYSIS_SYSTEM_PROMPT = ( - "You are a senior DBA and database performance engineer performing a deep-dive " - "analysis. You have been given detailed performance data including SQL IDs/query IDs, " - "execution plans, full table scans, existing indexes, stats freshness, row contention " - "events, sequence caching issues, and other best-practice metrics.\n\n" - "Produce the following sections:\n\n" + "You are a senior DBA analysing REAL performance data from a live database.\n\n" + "CRITICAL RULES — violating any of these makes your analysis useless:\n" + "- ONLY reference sql_ids, queryids, table names, and SQL text that appear " + "in the data below. NEVER invent fake IDs or placeholder names.\n" + "- If a section has '(no data)', say 'No issues found' and move on.\n" + "- For every problematic SQL, copy the ACTUAL query text from the data into " + "a ```sql code block.\n" + "- Provide EXACT fix commands (CREATE INDEX, ANALYZE, ALTER SEQUENCE, etc.) " + "with real table/column names from the data.\n" + "- Skip any section where the data shows no problems.\n" + "- Never output generic advice or example/template text.\n\n" + "Produce these sections (skip sections with no relevant data):\n" "## Executive Summary\n" - "2-3 sentences summarising the overall database health and biggest concern.\n\n" "## High Elapsed Time SQL\n" - "For EACH SQL with high average elapsed time per execution (reference sql_id/queryid):\n" - "- Quote the sql_id / queryid, avg elapsed, total elapsed, and a snippet\n" - "- Explain WHY it is slow (full table scan, missing index, bad join, bad stats)\n" - "- Provide the EXACT fix SQL (CREATE INDEX, ANALYZE, rewrite, etc.)\n\n" "## High Execution Count SQL\n" - "For SQL executed thousands of times:\n" - "- Even small per-execution cost adds up; flag these with sql_id/queryid\n" - "- Suggest caching, batching, or query consolidation where applicable\n" - "- Provide exact fix SQL if index or rewrite would help\n\n" "## Full Table Scans\n" - "List every table being full-scanned with the sql_id causing it.\n" - "- For each, check the existing indexes section — if an index already exists " - "that should have been used, suggest gathering fresh stats or checking predicates.\n" - "- If no suitable index exists, provide the exact CREATE INDEX statement.\n\n" - "## Row Contention & Locking Issues\n" - "Analyse the row contention / enqueue wait events data:\n" - "- Flag 'enq: TX - row lock contention' and similar events with wait times\n" - "- Identify the likely cause (hot blocks, ITL contention, poor sequence caching)\n" - "- Provide fixes: increase INITRANS, reduce transaction scope, batch commits\n\n" + "## Row Contention & Locking\n" "## Sequence Caching Issues\n" - "For sequences with NOCACHE or CACHE 1:\n" - "- Explain the performance impact (row cache lock waits, redo contention)\n" - "- Provide exact ALTER SEQUENCE ... CACHE 20 (or higher) statements\n" - "- Flag ORDER sequences that may need NOORDER for better performance\n\n" "## Missing / Recommended Indexes\n" - "Based on the query patterns (WHERE, JOIN, ORDER BY columns visible in SQL text), " - "suggest specific CREATE INDEX statements. Reference the sql_id/queryid that " - "would benefit.\n\n" - "## Stale Statistics / Vacuum / Bloat Issues\n" - "List tables with stale or missing stats. Provide exact ANALYZE / DBMS_STATS " - "commands. For PostgreSQL, flag tables with high dead-tuple ratios needing VACUUM " - "and estimate bloat. For Oracle, flag tables not analysed in 7+ days.\n\n" - "## Temp Space / Sort Issues\n" - "Flag queries spilling to temp (temp_blks_read/written for PG, sorts (disk) for " - "Oracle). Suggest work_mem increase, index to avoid sort, or query rewrite.\n\n" + "## Stale Statistics / Vacuum / Bloat\n" "## Unused Indexes\n" - "List indexes that have never been scanned and recommend dropping them " - "(provide DROP INDEX statements).\n\n" - "## Checkpoint / Redo / WAL Issues\n" - "For Oracle: flag excessive redo log switches (>6/hour). " - "For PostgreSQL: flag high backend_write_pct (buffers_backend vs checkpoint). " - "Suggest redo log sizing or checkpoint_completion_target tuning.\n\n" + "## Checkpoint / WAL Issues\n" "## Action Plan (Priority Order)\n" - "Numbered list of actions sorted by impact. Each action must include:\n" - "- The specific sql_id / queryid / table / sequence affected\n" - "- The exact SQL command to execute\n" - "- Expected improvement\n\n" - "IMPORTANT RULES:\n" - "1. Be SPECIFIC — always reference sql_id, queryid, or table name.\n" - "2. Always QUOTE the full SQL text provided in the data alongside the sql_id/queryid. " - "Show the complete query text so the reader can understand exactly which SQL is problematic.\n" - "3. Never give generic advice like 'add indexes where needed'.\n" - "4. Exclude all system/internal queries — focus only on user application SQL.\n" - "5. Use markdown formatting with code blocks for SQL.\n" - "6. For each problematic SQL, show it in a code block like:\n" - " ```sql\n" - " -- sql_id: ABC123\n" - " SELECT ... (full query text from the data)\n" - " ```\n" - "7. Then explain the issue and provide the fix SQL in another code block." ) @@ -1131,7 +1086,12 @@ def _collect_postgresql(self) -> dict[str, Any]: def _format_report(self, data: dict[str, Any]) -> str: """Format collected data into a human-readable report for the LLM.""" db_type = data.get("db_type", "unknown") - parts = [f"DATABASE PERFORMANCE REPORT ({db_type.upper()})\n{'=' * 60}\n"] + parts = [ + f"REAL DATABASE PERFORMANCE DATA ({db_type.upper()})\n{'=' * 60}\n", + "Below is REAL data collected from a live database. " + "Analyse ONLY this data. Do NOT invent sql_ids, table names, or queries " + "that do not appear below.\n", + ] for section_name, section_data in data.items(): if section_name in ("db_type", "snap_range", "sample_range"): diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py index 518f7fc..0016333 100644 --- a/tools/pg-assistant/snapshot_compare.py +++ b/tools/pg-assistant/snapshot_compare.py @@ -856,27 +856,17 @@ def _format_comparison_text( def _get_llm_comparison(self, text: str) -> str: system_prompt = ( - "You are a senior DBA comparing two database performance snapshots. " - "Produce a detailed comparison report with these sections:\n\n" + "You are a senior DBA comparing two REAL database snapshots.\n\n" + "CRITICAL: ONLY reference sql_ids, queryids, table names, and SQL text " + "that appear in the data below. NEVER invent fake IDs or placeholders.\n\n" + "Produce these sections (skip sections with no relevant data):\n" "## Executive Summary\n" - "2-3 sentences on overall change in database health between the two periods.\n\n" "## Key Metric Changes\n" - "For each metric that changed significantly (>10%), explain the change " - "and its likely cause. Reference specific sql_id/queryid values.\n\n" "## New or Regressed SQL\n" - "Identify SQL that appeared in Snapshot B but not A (new workload), or SQL " - "whose elapsed time increased significantly. For each, explain the likely " - "cause and provide specific fix SQL (CREATE INDEX, ANALYZE, rewrite).\n\n" "## Wait Event Changes\n" - "Highlight wait events that increased or decreased. Explain implications " - "(e.g., increased 'enq: TX - row lock contention' suggests locking issues).\n\n" - "## Recommendations\n" - "Numbered action plan sorted by impact. Each item must include:\n" - "- The specific sql_id/queryid/object affected\n" - "- The exact SQL command to execute\n" - "- Expected improvement\n\n" - "IMPORTANT: Be SPECIFIC. Always reference sql_id, queryid, or table names. " - "Never give generic advice. Use markdown code blocks for SQL." + "## Recommendations\n\n" + "For each problematic SQL, copy the ACTUAL query text from the data " + "into a ```sql code block. Provide exact fix commands." ) try: return self.llm.generate(prompt=text, system_prompt=system_prompt) From fe6674fecb0e5dbcdad3c9f944fe71ee84849ca7 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 10:48:30 +0000 Subject: [PATCH 15/19] Fix LLM hallucination v2: move instructions AFTER data in prompt instead of system prompt (codellama is a completion model, not instruction-following) --- tools/pg-assistant/auto_analyse.py | 58 ++++++++++++++++---------- tools/pg-assistant/snapshot_compare.py | 23 ++++++---- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index f15857f..e192163 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -796,30 +796,46 @@ CROSS JOIN pg_stat_bgwriter bg """ -ANALYSIS_SYSTEM_PROMPT = ( - "You are a senior DBA analysing REAL performance data from a live database.\n\n" - "CRITICAL RULES — violating any of these makes your analysis useless:\n" - "- ONLY reference sql_ids, queryids, table names, and SQL text that appear " - "in the data below. NEVER invent fake IDs or placeholder names.\n" - "- If a section has '(no data)', say 'No issues found' and move on.\n" - "- For every problematic SQL, copy the ACTUAL query text from the data into " - "a ```sql code block.\n" - "- Provide EXACT fix commands (CREATE INDEX, ANALYZE, ALTER SEQUENCE, etc.) " - "with real table/column names from the data.\n" - "- Skip any section where the data shows no problems.\n" - "- Never output generic advice or example/template text.\n\n" - "Produce these sections (skip sections with no relevant data):\n" +# Instruction block appended AFTER the data in the prompt. +# codellama is a completion model — it works best when instructions follow +# the data so it "completes" the report rather than fabricating from the +# system prompt. +_ANALYSIS_INSTRUCTION = ( + "\n\n" + "=" * 60 + "\n" + "TASK: Analyse the REAL data above. Write a report that ONLY references " + "the sql_ids, queryids, table names, and SQL text shown above. " + "Do NOT invent any IDs, table names, or queries.\n\n" + "For each section below, if the data above has no relevant rows, " + "write 'No issues found.' and move on.\n\n" "## Executive Summary\n" + "2-3 sentences about the biggest issues found in the data above.\n\n" "## High Elapsed Time SQL\n" + "List each sql_id/queryid from the HIGH ELAPSED PER EXEC section above. " + "Copy its query_text. Explain why it is slow and give a CREATE INDEX or fix.\n\n" "## High Execution Count SQL\n" + "List each sql_id/queryid from the HIGH EXECUTION COUNT section above. " + "Copy its query_text. Suggest caching or indexing.\n\n" "## Full Table Scans\n" + "List tables from the SEQ SCAN TABLES section above with high seq_scan counts. " + "Suggest CREATE INDEX statements using real column names.\n\n" "## Row Contention & Locking\n" + "List events from ROW CONTENTION or LOCK WAITS sections above. Suggest fixes.\n\n" "## Sequence Caching Issues\n" + "List sequences from SEQUENCE CACHE ISSUES section above. " + "Give ALTER SEQUENCE ... CACHE 20 statements.\n\n" "## Missing / Recommended Indexes\n" + "Based on query WHERE/JOIN columns visible in the SQL text above, " + "suggest specific CREATE INDEX statements.\n\n" "## Stale Statistics / Vacuum / Bloat\n" + "List tables from STALE STATS or BLOAT ESTIMATE sections above. " + "Give ANALYZE or VACUUM commands.\n\n" "## Unused Indexes\n" + "List indexes from UNUSED INDEXES section above. Give DROP INDEX statements.\n\n" "## Checkpoint / WAL Issues\n" - "## Action Plan (Priority Order)\n" + "Review CHECKPOINT STATS and BGWRITER STATS sections above. Flag any issues.\n\n" + "## Action Plan\n" + "Numbered list of fixes sorted by impact, using ONLY data from above.\n" ) @@ -900,11 +916,11 @@ def check_pg_stat_statements(self) -> bool: def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]: report_text = self._format_report(raw_data) + # Append instructions AFTER the data so codellama "completes" a + # real analysis rather than hallucinating from a system prompt. + full_prompt = report_text + _ANALYSIS_INSTRUCTION try: - llm_response = self.llm_client.generate( - prompt=report_text, - system_prompt=ANALYSIS_SYSTEM_PROMPT, - ) + llm_response = self.llm_client.generate(prompt=full_prompt) except (ConnectionError, RuntimeError) as exc: llm_response = f"LLM analysis failed: {exc}" return { @@ -914,11 +930,9 @@ def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]: } def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]: + full_prompt = report_text + _ANALYSIS_INSTRUCTION try: - llm_response = self.llm_client.generate( - prompt=report_text, - system_prompt=ANALYSIS_SYSTEM_PROMPT, - ) + llm_response = self.llm_client.generate(prompt=full_prompt) except (ConnectionError, RuntimeError) as exc: llm_response = f"LLM analysis failed: {exc}" return { diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py index 0016333..3f4cc52 100644 --- a/tools/pg-assistant/snapshot_compare.py +++ b/tools/pg-assistant/snapshot_compare.py @@ -855,21 +855,26 @@ def _format_comparison_text( return "\n".join(parts) def _get_llm_comparison(self, text: str) -> str: - system_prompt = ( - "You are a senior DBA comparing two REAL database snapshots.\n\n" - "CRITICAL: ONLY reference sql_ids, queryids, table names, and SQL text " - "that appear in the data below. NEVER invent fake IDs or placeholders.\n\n" - "Produce these sections (skip sections with no relevant data):\n" + # Append instructions AFTER the data so codellama "completes" a + # real analysis rather than hallucinating from a system prompt. + instruction = ( + "\n\n" + "=" * 60 + "\n" + "TASK: Compare the two snapshots above. Write a report that ONLY " + "references sql_ids, queryids, table names, and SQL text shown above. " + "Do NOT invent any IDs, table names, or queries.\n\n" "## Executive Summary\n" + "What changed between Snapshot A and Snapshot B?\n\n" "## Key Metric Changes\n" + "List metrics from the DELTA SUMMARY above that changed >10%.\n\n" "## New or Regressed SQL\n" + "SQL that appeared or got worse in Snapshot B. Copy query_text.\n\n" "## Wait Event Changes\n" - "## Recommendations\n\n" - "For each problematic SQL, copy the ACTUAL query text from the data " - "into a ```sql code block. Provide exact fix commands." + "Wait events that increased or decreased between snapshots.\n\n" + "## Recommendations\n" + "Numbered action plan using ONLY data from above.\n" ) try: - return self.llm.generate(prompt=text, system_prompt=system_prompt) + return self.llm.generate(prompt=text + instruction) except (ConnectionError, RuntimeError) as exc: return f"LLM comparison analysis failed: {exc}" From 390ce71387f16c972a244409096e8428cd4e92f5 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 11:00:35 +0000 Subject: [PATCH 16/19] Replace LLM-based analysis with programmatic Python analysis engine - Python code now identifies all issues (high elapsed SQL, full table scans, sequence caching, stale stats, unused indexes, etc.) with real sql_ids, table names, and query text - LLM only provides a brief supplementary summary of pre-identified findings - Same hybrid approach applied to snapshot comparison --- tools/pg-assistant/auto_analyse.py | 521 ++++++++++++++++++++++--- tools/pg-assistant/snapshot_compare.py | 168 ++++++-- 2 files changed, 618 insertions(+), 71 deletions(-) diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index e192163..db583f2 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -796,47 +796,449 @@ CROSS JOIN pg_stat_bgwriter bg """ -# Instruction block appended AFTER the data in the prompt. -# codellama is a completion model — it works best when instructions follow -# the data so it "completes" the report rather than fabricating from the -# system prompt. -_ANALYSIS_INSTRUCTION = ( - "\n\n" - "=" * 60 + "\n" - "TASK: Analyse the REAL data above. Write a report that ONLY references " - "the sql_ids, queryids, table names, and SQL text shown above. " - "Do NOT invent any IDs, table names, or queries.\n\n" - "For each section below, if the data above has no relevant rows, " - "write 'No issues found.' and move on.\n\n" - "## Executive Summary\n" - "2-3 sentences about the biggest issues found in the data above.\n\n" - "## High Elapsed Time SQL\n" - "List each sql_id/queryid from the HIGH ELAPSED PER EXEC section above. " - "Copy its query_text. Explain why it is slow and give a CREATE INDEX or fix.\n\n" - "## High Execution Count SQL\n" - "List each sql_id/queryid from the HIGH EXECUTION COUNT section above. " - "Copy its query_text. Suggest caching or indexing.\n\n" - "## Full Table Scans\n" - "List tables from the SEQ SCAN TABLES section above with high seq_scan counts. " - "Suggest CREATE INDEX statements using real column names.\n\n" - "## Row Contention & Locking\n" - "List events from ROW CONTENTION or LOCK WAITS sections above. Suggest fixes.\n\n" - "## Sequence Caching Issues\n" - "List sequences from SEQUENCE CACHE ISSUES section above. " - "Give ALTER SEQUENCE ... CACHE 20 statements.\n\n" - "## Missing / Recommended Indexes\n" - "Based on query WHERE/JOIN columns visible in the SQL text above, " - "suggest specific CREATE INDEX statements.\n\n" - "## Stale Statistics / Vacuum / Bloat\n" - "List tables from STALE STATS or BLOAT ESTIMATE sections above. " - "Give ANALYZE or VACUUM commands.\n\n" - "## Unused Indexes\n" - "List indexes from UNUSED INDEXES section above. Give DROP INDEX statements.\n\n" - "## Checkpoint / WAL Issues\n" - "Review CHECKPOINT STATS and BGWRITER STATS sections above. Flag any issues.\n\n" - "## Action Plan\n" - "Numbered list of fixes sorted by impact, using ONLY data from above.\n" -) +# --------------------------------------------------------------------------- +# Programmatic analysis — Python code does the heavy lifting, not the LLM. +# --------------------------------------------------------------------------- + + +def _safe_float(val: Any, default: float = 0.0) -> float: + """Safely convert a value to float.""" + try: + return float(val) + except (TypeError, ValueError): + return default + + +def _safe_int(val: Any, default: int = 0) -> int: + """Safely convert a value to int.""" + try: + return int(val) + except (TypeError, ValueError): + return default + + +def _truncate_sql(sql_text: str, length: int = 200) -> str: + """Truncate SQL text for display.""" + if not sql_text: + return "(no SQL text)" + sql_text = str(sql_text).strip() + if len(sql_text) > length: + return sql_text[:length] + "..." + return sql_text + + +def _build_findings_report(data: dict[str, Any]) -> str: + """Analyse collected data programmatically and build a markdown report. + + This function does the actual analysis in Python code — identifying + problematic SQL, full table scans, missing indexes, etc. from the + real data. No LLM is involved in finding issues. + """ + db_type = data.get("db_type", "unknown") + is_oracle = db_type == DB_TYPE_ORACLE + parts: list[str] = [] + action_items: list[str] = [] + action_idx = 0 + + parts.append(f"# Performance Analysis Report ({db_type.upper()})") + parts.append("") + + # --- High Elapsed Time SQL ------------------------------------------------ + section_key = "high_elapsed_per_exec" + rows = _get_rows(data, section_key) + parts.append("## High Elapsed Time SQL") + if not rows: + parts.append("No issues found.\n") + else: + parts.append("") + for row in rows: + sid = row.get("sql_id") or row.get("queryid") or "?" + avg_elapsed = _safe_float(row.get("avg_elapsed_sec", 0)) + total_elapsed = _safe_float( + row.get("total_elapsed_sec") or row.get("total_exec_sec", 0) + ) + execs = _safe_int(row.get("executions") or row.get("calls", 0)) + sql_text = str(row.get("sql_text") or row.get("query_text") or "") + gets = _safe_int(row.get("buffer_gets") or row.get("shared_blks_read", 0)) + parts.append( + f"**{'sql_id' if is_oracle else 'queryid'}: `{sid}`** — " + f"avg {avg_elapsed:.4f}s/exec, {execs} executions, " + f"total {total_elapsed:.2f}s, buffer gets/reads: {gets}" + ) + if sql_text: + parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") + action_idx += 1 + action_items.append( + f"{action_idx}. **[HIGH ELAPSED]** Investigate `{sid}` " + f"(avg {avg_elapsed:.4f}s/exec). Consider adding indexes on " + f"columns used in WHERE/JOIN clauses." + ) + parts.append("") + + # --- High Execution Count SQL --------------------------------------------- + section_key = "high_execution_count" + rows = _get_rows(data, section_key) + parts.append("## High Execution Count SQL") + if not rows: + parts.append("No issues found.\n") + else: + parts.append("") + for row in rows: + sid = row.get("sql_id") or row.get("queryid") or "?" + execs = _safe_int(row.get("executions") or row.get("calls", 0)) + total_elapsed = _safe_float( + row.get("total_elapsed_sec") or row.get("total_exec_sec", 0) + ) + sql_text = str(row.get("sql_text") or row.get("query_text") or "") + parts.append( + f"**{'sql_id' if is_oracle else 'queryid'}: `{sid}`** — " + f"{execs:,} executions, total {total_elapsed:.2f}s" + ) + if sql_text: + parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") + if execs > 100000: + action_idx += 1 + action_items.append( + f"{action_idx}. **[HIGH EXEC COUNT]** `{sid}` executed " + f"{execs:,} times. Consider caching results or batching." + ) + parts.append("") + + # --- Full Table Scans ----------------------------------------------------- + fts_key = "full_table_scans" if is_oracle else "seq_scan_tables" + rows = _get_rows(data, fts_key) + parts.append("## Full Table Scans") + if not rows: + parts.append("No issues found.\n") + else: + parts.append("") + for row in rows: + if is_oracle: + table = row.get("table_name", "?") + owner = row.get("object_owner", "") + sid = row.get("sql_id", "?") + execs = _safe_int(row.get("executions", 0)) + sql_text = str(row.get("sql_text") or "") + parts.append( + f"**Table: `{owner}.{table}`** — sql_id: `{sid}`, " + f"{execs} executions" + ) + if sql_text: + parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") + action_idx += 1 + action_items.append( + f"{action_idx}. **[FULL TABLE SCAN]** `{owner}.{table}` " + f"via sql_id `{sid}`. Review query and add appropriate index." + ) + else: + table = row.get("relname", "?") + schema = row.get("schemaname", "public") + seq_scans = _safe_int(row.get("seq_scan", 0)) + seq_reads = _safe_int(row.get("seq_tup_read", 0)) + idx_scans = _safe_int(row.get("idx_scan", 0)) + live_tup = _safe_int(row.get("n_live_tup", 0)) + size_mb = _safe_float(row.get("table_size_mb", 0)) + parts.append( + f"**Table: `{schema}.{table}`** — " + f"{seq_scans:,} seq scans, {seq_reads:,} rows read, " + f"{idx_scans:,} idx scans, {live_tup:,} live rows, " + f"{size_mb:.1f} MB" + ) + if seq_scans > 100 and live_tup > 10000: + action_idx += 1 + action_items.append( + f"{action_idx}. **[SEQ SCAN]** `{schema}.{table}` has " + f"{seq_scans:,} seq scans on {live_tup:,} rows. " + f"Add indexes on frequently filtered columns." + ) + parts.append("") + + # --- Row Contention & Locking --------------------------------------------- + contention_key = "row_contention" if is_oracle else "lock_waits" + rows = _get_rows(data, contention_key) + parts.append("## Row Contention & Locking") + if not rows: + parts.append("No issues found.\n") + else: + parts.append("") + for row in rows: + if is_oracle: + event = row.get("event", "?") + waits = _safe_int(row.get("total_waits", 0)) + waited_sec = _safe_float(row.get("time_waited_sec", 0)) + parts.append( + f"**Event: `{event}`** — {waits:,} waits, " + f"{waited_sec:.2f}s total wait time" + ) + if waited_sec > 1: + action_idx += 1 + action_items.append( + f"{action_idx}. **[CONTENTION]** `{event}` — " + f"{waited_sec:.2f}s total. Reduce hot-row updates, " + f"increase INITRANS, or tune locking strategy." + ) + else: + pid = row.get("pid", "?") + user = row.get("usename", "?") + event = row.get("wait_event", "?") + event_type = row.get("wait_event_type", "") + running_sec = _safe_float(row.get("running_sec", 0)) + query = str(row.get("query") or "") + parts.append( + f"**PID {pid}** (user: {user}) — wait: {event_type}/{event}, " + f"running {running_sec:.2f}s" + ) + if query: + parts.append(f"```sql\n{_truncate_sql(query, 200)}\n```") + parts.append("") + + # --- Sequence Caching Issues ----------------------------------------------- + seq_key = "sequence_no_cache" if is_oracle else "sequence_cache_issues" + rows = _get_rows(data, seq_key) + parts.append("## Sequence Caching Issues") + if not rows: + parts.append("No issues found.\n") + else: + parts.append("") + for row in rows: + if is_oracle: + owner = row.get("sequence_owner", "") + name = row.get("sequence_name", "?") + cache = _safe_int(row.get("cache_size", 0)) + parts.append( + f"**`{owner}.{name}`** — cache_size={cache} (should be >= 20)" + ) + action_idx += 1 + action_items.append( + f"{action_idx}. **[SEQUENCE]** " + f"`ALTER SEQUENCE {owner}.{name} CACHE 20;`" + ) + else: + schema = row.get("schemaname", "public") + name = row.get("sequencename", "?") + cache = _safe_int(row.get("cache_size") or 0) + parts.append( + f"**`{schema}.{name}`** — cache_size={cache} (should be >= 20)" + ) + action_idx += 1 + action_items.append( + f"{action_idx}. **[SEQUENCE]** " + f"`ALTER SEQUENCE {schema}.{name} CACHE 20;`" + ) + parts.append("") + + # --- Stale Statistics / Vacuum / Bloat ------------------------------------ + if is_oracle: + rows = _get_rows(data, "stale_statistics") + else: + rows = _get_rows(data, "stale_stats_vacuum") + _get_rows(data, "bloat_estimate") + # Deduplicate by table name + seen_tables: set[str] = set() + deduped: list[dict[str, Any]] = [] + for r in rows: + key = f"{r.get('schemaname', '')}.{r.get('relname', '')}" + if key not in seen_tables: + seen_tables.add(key) + deduped.append(r) + rows = deduped + + parts.append("## Stale Statistics / Vacuum / Bloat") + if not rows: + parts.append("No issues found.\n") + else: + parts.append("") + for row in rows: + if is_oracle: + table = row.get("table_name", "?") + num_rows = _safe_int(row.get("num_rows", 0)) + stale = row.get("stale_stats", "?") + last_analyzed = row.get("last_analyzed", "never") + days = _safe_float(row.get("days_since_analyzed", 0)) + parts.append( + f"**`{table}`** — {num_rows:,} rows, stale={stale}, " + f"last analyzed: {last_analyzed} ({days:.0f} days ago)" + ) + action_idx += 1 + action_items.append( + f"{action_idx}. **[STALE STATS]** " + f"`EXEC DBMS_STATS.GATHER_TABLE_STATS" + f"(ownname=>USER, tabname=>'{table}');`" + ) + else: + schema = row.get("schemaname", "public") + table = row.get("relname", "?") + dead = _safe_int(row.get("n_dead_tup", 0)) + live = _safe_int(row.get("n_live_tup", 0)) + dead_pct = _safe_float(row.get("dead_pct", 0)) + last_vac = ( + row.get("last_autovacuum") or row.get("last_vacuum") or "never" + ) + last_analyze = ( + row.get("last_autoanalyze") or row.get("last_analyze") or "never" + ) + parts.append( + f"**`{schema}.{table}`** — {live:,} live, {dead:,} dead " + f"({dead_pct:.1f}% bloat), last vacuum: {last_vac}, " + f"last analyze: {last_analyze}" + ) + if dead_pct > 20 or dead > 50000: + action_idx += 1 + action_items.append( + f"{action_idx}. **[BLOAT]** `VACUUM ANALYZE {schema}.{table};` " + f"— {dead_pct:.1f}% dead tuples" + ) + elif str(last_analyze) == "never" or str(last_analyze) == "None": + action_idx += 1 + action_items.append( + f"{action_idx}. **[STALE STATS]** " + f"`ANALYZE {schema}.{table};` — never analyzed" + ) + parts.append("") + + # --- Unused Indexes ------------------------------------------------------- + rows = _get_rows(data, "unused_indexes") + parts.append("## Unused Indexes") + if not rows: + parts.append("No issues found.\n") + else: + parts.append("") + for row in rows: + schema = row.get("schemaname", "public") + table = row.get("relname", "?") + idx_name = row.get("indexrelname", "?") + size_mb = _safe_float(row.get("index_size_mb", 0)) + parts.append( + f"**`{schema}.{idx_name}`** on `{table}` — {size_mb:.1f} MB, 0 scans" + ) + if size_mb > 1: + action_idx += 1 + action_items.append( + f"{action_idx}. **[UNUSED INDEX]** " + f"`DROP INDEX {schema}.{idx_name};` — " + f"{size_mb:.1f} MB wasted" + ) + parts.append("") + + # --- Checkpoint / WAL Issues (PostgreSQL) --------------------------------- + if not is_oracle: + cp_rows = _get_rows(data, "checkpoint_stats") + parts.append("## Checkpoint / WAL Issues") + has_issue = False + if cp_rows: + row = cp_rows[0] + backend_pct = _safe_float(row.get("backend_write_pct", 0)) + req = _safe_int(row.get("checkpoints_req", 0)) + timed = _safe_int(row.get("checkpoints_timed", 0)) + parts.append( + f"Checkpoints: {timed} timed, {req} requested. " + f"Backend write %: {backend_pct:.1f}%" + ) + if backend_pct > 10: + has_issue = True + action_idx += 1 + action_items.append( + f"{action_idx}. **[CHECKPOINT]** Backend writes are " + f"{backend_pct:.1f}% of total — increase " + f"`shared_buffers` and `checkpoint_completion_target`." + ) + if req > timed and timed > 0: + has_issue = True + action_idx += 1 + action_items.append( + f"{action_idx}. **[CHECKPOINT]** More requested ({req}) than " + f"timed ({timed}) checkpoints — increase `max_wal_size`." + ) + if not has_issue: + parts.append("No issues found.") + parts.append("") + + # --- Wait Events (Oracle) ------------------------------------------------- + if is_oracle: + rows = _get_rows(data, "wait_events") + parts.append("## Top Wait Events") + if not rows: + parts.append("No issues found.\n") + else: + parts.append("") + for row in rows[:10]: + event = row.get("event", "?") + waits = _safe_int(row.get("total_waits", 0)) + waited = _safe_float(row.get("time_waited_sec", 0)) + parts.append(f"- **`{event}`** — {waits:,} waits, {waited:.2f}s") + parts.append("") + + # --- Temp File Usage (PostgreSQL) ----------------------------------------- + if not is_oracle: + rows = _get_rows(data, "temp_file_usage") + if rows: + parts.append("## Temp File Usage") + parts.append("") + for row in rows[:5]: + sid = row.get("queryid", "?") + temp_mb = _safe_float(row.get("temp_mb", 0)) + sql_text = str(row.get("query_text") or "") + parts.append(f"**queryid: `{sid}`** — {temp_mb:.1f} MB temp usage") + if sql_text: + parts.append(f"```sql\n{_truncate_sql(sql_text, 200)}\n```") + if temp_mb > 100: + action_idx += 1 + action_items.append( + f"{action_idx}. **[TEMP FILES]** queryid `{sid}` uses " + f"{temp_mb:.1f} MB temp. Increase `work_mem` or optimize " + f"sort/join." + ) + parts.append("") + + # --- Executive Summary & Action Plan -------------------------------------- + summary_parts: list[str] = [] + high_elapsed = _get_rows(data, "high_elapsed_per_exec") + high_exec = _get_rows(data, "high_execution_count") + fts = _get_rows(data, "full_table_scans" if is_oracle else "seq_scan_tables") + contention = _get_rows(data, "row_contention" if is_oracle else "lock_waits") + seqs = _get_rows( + data, "sequence_no_cache" if is_oracle else "sequence_cache_issues" + ) + + if high_elapsed: + summary_parts.append( + f"{len(high_elapsed)} queries with high elapsed time per execution" + ) + if high_exec: + summary_parts.append( + f"{len(high_exec)} queries with very high execution counts" + ) + if fts: + summary_parts.append( + f"{len(fts)} {'full table scans' if is_oracle else 'tables with heavy seq scans'}" + ) + if contention: + summary_parts.append(f"{len(contention)} contention/lock wait events") + if seqs: + summary_parts.append(f"{len(seqs)} sequences with no/low caching") + + exec_summary = ( + "Found: " + "; ".join(summary_parts) + "." + if summary_parts + else "No significant performance issues detected." + ) + + # Build final report: summary at top, then sections, then action plan + header = [f"## Executive Summary\n{exec_summary}\n"] + footer = ["\n## Action Plan (Priority Order)\n"] + if action_items: + footer.extend(action_items) + else: + footer.append("No action items — database appears healthy.") + + return "\n".join(header + parts + footer) + + +def _get_rows(data: dict[str, Any], key: str) -> list[dict[str, Any]]: + """Safely extract a list of row dicts from collected data.""" + val = data.get(key, []) + if isinstance(val, list): + return val + return [] # --------------------------------------------------------------------------- @@ -915,24 +1317,45 @@ def check_pg_stat_statements(self) -> bool: # -- internal helpers ---------------------------------------------------- def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]: + # Programmatic analysis — Python code identifies all issues. + findings_report = _build_findings_report(raw_data) report_text = self._format_report(raw_data) - # Append instructions AFTER the data so codellama "completes" a - # real analysis rather than hallucinating from a system prompt. - full_prompt = report_text + _ANALYSIS_INSTRUCTION + + # Ask the LLM for a brief supplementary summary only. + llm_summary = "" try: - llm_response = self.llm_client.generate(prompt=full_prompt) + llm_prompt = ( + findings_report + "\n\n---\n" + "Based on the findings above, write 3-5 sentences summarising " + "the most critical issues and what the DBA should do first. " + "Do NOT repeat the full report. Do NOT invent new findings." + ) + llm_summary = self.llm_client.generate(prompt=llm_prompt) except (ConnectionError, RuntimeError) as exc: - llm_response = f"LLM analysis failed: {exc}" + llm_summary = f"(LLM summary unavailable: {exc})" + + # Combine: programmatic findings + optional LLM summary + analysis = findings_report + if llm_summary: + analysis += f"\n\n---\n## LLM Summary\n{llm_summary}" + return { "raw_data": raw_data, "report_text": report_text, - "analysis": llm_response, + "analysis": analysis, } def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]: - full_prompt = report_text + _ANALYSIS_INSTRUCTION + # For uploaded reports, we still need the LLM since we don't + # have structured data — but we keep the prompt minimal. + llm_prompt = ( + report_text + "\n\n---\n" + "Summarise the key performance issues in the report above. " + "Only reference data that actually appears above. " + "Do NOT invent sql_ids, table names, or metrics." + ) try: - llm_response = self.llm_client.generate(prompt=full_prompt) + llm_response = self.llm_client.generate(prompt=llm_prompt) except (ConnectionError, RuntimeError) as exc: llm_response = f"LLM analysis failed: {exc}" return { diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py index 3f4cc52..22eb9de 100644 --- a/tools/pg-assistant/snapshot_compare.py +++ b/tools/pg-assistant/snapshot_compare.py @@ -281,11 +281,19 @@ def _build_comparison( # Build delta summary table delta_table = self._build_delta_table(data_a, data_b, label_a, label_b) - # LLM comparison summary + # Programmatic comparison — Python code identifies all changes. + findings = self._build_programmatic_comparison( + data_a, data_b, label_a, label_b, delta_table + ) + + # Optional LLM summary appended after the real findings. comparison_text = self._format_comparison_text( data_a, data_b, label_a, label_b, delta_table ) - analysis = self._get_llm_comparison(comparison_text) + llm_summary = self._get_llm_comparison(comparison_text) + analysis = findings + if llm_summary: + analysis += f"\n\n---\n## LLM Summary\n{llm_summary}" return { "figures": figures, @@ -855,28 +863,144 @@ def _format_comparison_text( return "\n".join(parts) def _get_llm_comparison(self, text: str) -> str: - # Append instructions AFTER the data so codellama "completes" a - # real analysis rather than hallucinating from a system prompt. - instruction = ( - "\n\n" + "=" * 60 + "\n" - "TASK: Compare the two snapshots above. Write a report that ONLY " - "references sql_ids, queryids, table names, and SQL text shown above. " - "Do NOT invent any IDs, table names, or queries.\n\n" - "## Executive Summary\n" - "What changed between Snapshot A and Snapshot B?\n\n" - "## Key Metric Changes\n" - "List metrics from the DELTA SUMMARY above that changed >10%.\n\n" - "## New or Regressed SQL\n" - "SQL that appeared or got worse in Snapshot B. Copy query_text.\n\n" - "## Wait Event Changes\n" - "Wait events that increased or decreased between snapshots.\n\n" - "## Recommendations\n" - "Numbered action plan using ONLY data from above.\n" - ) + # Build programmatic comparison findings first, then ask LLM + # for a brief summary only. try: - return self.llm.generate(prompt=text + instruction) + llm_prompt = ( + text + "\n\n---\n" + "Based on the snapshot comparison data above, write 3-5 sentences " + "summarising what changed and what the DBA should investigate. " + "Do NOT invent any sql_ids, table names, or metrics." + ) + return self.llm.generate(prompt=llm_prompt) except (ConnectionError, RuntimeError) as exc: - return f"LLM comparison analysis failed: {exc}" + return f"LLM comparison summary unavailable: {exc}" + + def _build_programmatic_comparison( + self, + data_a: dict[str, Any], + data_b: dict[str, Any], + label_a: str, + label_b: str, + delta_table: list[dict[str, Any]], + ) -> str: + """Build a programmatic comparison report from real data.""" + parts: list[str] = [] + action_items: list[str] = [] + action_idx = 0 + + parts.append("# Snapshot Comparison Analysis") + parts.append(f"**{label_a}** vs **{label_b}**\n") + + # --- Key Metric Changes ----------------------------------------------- + parts.append("## Key Metric Changes") + parts.append("") + significant = [r for r in delta_table if abs(float(str(r.get("delta", 0)))) > 0] + if not significant: + parts.append("No significant metric changes.\n") + else: + for row in significant: + metric = row.get("metric", "?") + val_a = row.get(label_a, 0) + val_b = row.get(label_b, 0) + delta = row.get("delta", 0) + pct = row.get("change_pct", "0%") + parts.append( + f"- **{metric}**: {val_a} → {val_b} (delta: {delta}, {pct})" + ) + parts.append("") + + # --- Regressed / New SQL ----------------------------------------------- + parts.append("## New or Regressed SQL") + sql_a = { + str(r.get("sql_id") or r.get("queryid", "")): r + for r in data_a.get("top_sql", []) + } + sql_b = { + str(r.get("sql_id") or r.get("queryid", "")): r + for r in data_b.get("top_sql", []) + } + id_key = "sql_id" if self.is_oracle else "queryid" + regressed: list[str] = [] + for sid, row_b in sql_b.items(): + if not sid: + continue + elapsed_b = float(row_b.get("elapsed_sec", 0)) + sql_text = str(row_b.get("sql_text") or row_b.get("query_text") or "") + if sid in sql_a: + elapsed_a = float(sql_a[sid].get("elapsed_sec", 0)) + if elapsed_a > 0 and elapsed_b > elapsed_a * 1.2: + pct_change = ((elapsed_b - elapsed_a) / elapsed_a) * 100 + parts.append( + f"**{id_key}: `{sid}`** — elapsed " + f"{elapsed_a:.2f}s → {elapsed_b:.2f}s " + f"(+{pct_change:.0f}%)" + ) + if sql_text: + parts.append(f"```sql\n{sql_text[:200]}\n```") + regressed.append(sid) + action_idx += 1 + action_items.append( + f"{action_idx}. **[REGRESSED]** `{sid}` elapsed time " + f"increased {pct_change:.0f}%. Investigate plan change." + ) + else: + parts.append( + f"**{id_key}: `{sid}`** — NEW in snapshot B, " + f"elapsed {elapsed_b:.2f}s" + ) + if sql_text: + parts.append(f"```sql\n{sql_text[:200]}\n```") + regressed.append(sid) + + if not regressed: + parts.append("No new or regressed SQL detected.\n") + parts.append("") + + # --- Wait Event Changes ------------------------------------------------ + parts.append("## Wait Event Changes") + waits_a = { + str(r.get("event", "")): float(r.get("time_waited_sec", 0)) + for r in data_a.get("wait_events", []) + } + waits_b = { + str(r.get("event", "")): float(r.get("time_waited_sec", 0)) + for r in data_b.get("wait_events", []) + } + wait_changes: list[str] = [] + all_events = set(list(waits_a.keys()) + list(waits_b.keys())) + for evt in sorted(all_events): + wa = waits_a.get(evt, 0) + wb = waits_b.get(evt, 0) + if wa == 0 and wb == 0: + continue + delta = wb - wa + if abs(delta) > 1: + direction = "↑" if delta > 0 else "↓" + parts.append( + f"- **`{evt}`**: {wa:.2f}s → {wb:.2f}s " + f"({direction}{abs(delta):.2f}s)" + ) + wait_changes.append(evt) + if delta > 10: + action_idx += 1 + action_items.append( + f"{action_idx}. **[WAIT EVENT]** `{evt}` increased " + f"by {delta:.2f}s. Investigate root cause." + ) + + if not wait_changes: + parts.append("No significant wait event changes.\n") + parts.append("") + + # --- Action Plan ------------------------------------------------------- + parts.append("## Recommendations") + if action_items: + parts.extend(action_items) + else: + parts.append("No significant regressions detected between snapshots.") + + return "\n".join(parts) def _fmt(row: dict[str, Any]) -> str: From 4f946583c5164186cf2585f33800e22e79dc4c57 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 11:15:06 +0000 Subject: [PATCH 17/19] Comprehensive programmatic analysis: cover ALL data sections, remove LLM summary - Add top_cpu_queries/top_cpu_sql section (most important - always shows top SQL) - Add top_queries/top_elapsed_sql section (deduped from CPU section) - Add database_stats overview (cache hit ratio, connections, temp usage) - Add connection_stats section (idle connection detection) - Add Oracle system_stats with cache hit ratio, hard parse ratio, disk sorts - Add Oracle SGA configuration, tablespace I/O, redo log switches, temp usage - Add Oracle execution plans display with full scan/hash join detection - Add Oracle parallel queries section - Add pgProfile wait events section - Add table_stats (top tables by activity) section - Add AWR/pgProfile fallback for top SQL sections - Remove LLM summary entirely (codellama keeps hallucinating generic advice) - Update app.py labels: 'Performance Analysis Report' instead of 'AI Analysis' - All analysis is now 100% programmatic from real DB data --- tools/pg-assistant/app.py | 6 +- tools/pg-assistant/auto_analyse.py | 627 +++++++++++++++++++++---- tools/pg-assistant/snapshot_compare.py | 10 +- 3 files changed, 547 insertions(+), 96 deletions(-) diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index 12fc190..de1d146 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -85,10 +85,10 @@ def _render_comparison(result: dict) -> None: st.markdown(f"**{title}**") st.plotly_chart(fig, use_container_width=True) - # LLM analysis + # Comparison analysis analysis = result.get("analysis", "") if analysis: - st.markdown("### AI Comparison Analysis") + st.markdown("### Comparison Analysis") st.markdown(analysis) @@ -864,7 +864,7 @@ def _render_comparison(result: dict) -> None: st.divider() if last.get("analysis"): - st.subheader("AI Analysis & Action Plan") + st.subheader("Performance Analysis Report") st.markdown(last["analysis"]) raw = last.get("raw_data", {}) diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index db583f2..d3f891f 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -843,12 +843,334 @@ def _build_findings_report(data: dict[str, Any]) -> str: parts.append(f"# Performance Analysis Report ({db_type.upper()})") parts.append("") + # ===================================================================== + # DATABASE-LEVEL OVERVIEW + # ===================================================================== + + # --- Database Stats (PostgreSQL) ------------------------------------------ + if not is_oracle: + db_rows = _get_rows(data, "database_stats") + if db_rows: + row = db_rows[0] + cache_hit = _safe_float(row.get("cache_hit_pct", 0)) + commits = _safe_int(row.get("xact_commit", 0)) + rollbacks = _safe_int(row.get("xact_rollback", 0)) + backends = _safe_int(row.get("numbackends", 0)) + blks_read = _safe_int(row.get("blks_read", 0)) + blks_hit = _safe_int(row.get("blks_hit", 0)) + temp_bytes = _safe_int(row.get("temp_bytes", 0)) + temp_files = _safe_int(row.get("temp_files", 0)) + parts.append("## Database Overview") + parts.append( + f"- **Cache hit ratio:** {cache_hit:.2f}%\n" + f"- **Active backends:** {backends}\n" + f"- **Transactions:** {commits:,} commits, {rollbacks:,} rollbacks\n" + f"- **Blocks:** {blks_hit:,} hit, {blks_read:,} read from disk\n" + f"- **Temp usage:** {temp_files:,} files, " + f"{temp_bytes / 1048576:.1f} MB" + ) + if cache_hit < 95 and blks_read > 0: + action_idx += 1 + action_items.append( + f"{action_idx}. **[CACHE]** Cache hit ratio is {cache_hit:.2f}% " + f"(target > 99%). Increase `shared_buffers`." + ) + if rollbacks > 0 and commits > 0: + rb_pct = rollbacks / (commits + rollbacks) * 100 + if rb_pct > 5: + action_idx += 1 + action_items.append( + f"{action_idx}. **[ROLLBACKS]** {rb_pct:.1f}% rollback rate " + f"({rollbacks:,}/{commits + rollbacks:,}). " + f"Investigate application error handling." + ) + parts.append("") + + # --- Connection Stats (PostgreSQL) ---------------------------------------- + if not is_oracle: + conn_rows = _get_rows(data, "connection_stats") + if conn_rows: + parts.append("## Connection Stats") + for row in conn_rows: + state = row.get("state", "unknown") or "null" + count = _safe_int(row.get("count", 0)) + wtype = row.get("wait_event_type", "None") + parts.append(f"- **{state}**: {count} connections (wait: {wtype})") + idle_count = sum( + _safe_int(r.get("count", 0)) + for r in conn_rows + if (r.get("state") or "").startswith("idle") + ) + if idle_count > 50: + action_idx += 1 + action_items.append( + f"{action_idx}. **[CONNECTIONS]** {idle_count} idle connections. " + f"Use connection pooling (PgBouncer)." + ) + parts.append("") + + # --- Oracle System Stats -------------------------------------------------- + if is_oracle: + sys_rows = _get_rows(data, "system_stats") or _get_rows( + data, "awr_system_stats" + ) + if sys_rows: + parts.append("## System Statistics") + stats_map: dict[str, int] = {} + for row in sys_rows: + name = str(row.get("name", "")) + val = _safe_int(row.get("value", 0)) + stats_map[name] = val + parts.append(f"- **{name}:** {val:,}") + # Cache hit ratio + db_gets = stats_map.get("db block gets", 0) + consistent = stats_map.get("consistent gets", 0) + phys_reads = stats_map.get("physical reads", 0) + logical = db_gets + consistent + if logical > 0: + hit_pct = (1 - phys_reads / logical) * 100 + parts.append(f"\n**Buffer cache hit ratio: {hit_pct:.2f}%**") + if hit_pct < 95: + action_idx += 1 + action_items.append( + f"{action_idx}. **[CACHE]** Buffer cache hit ratio is " + f"{hit_pct:.2f}% (target > 99%). " + f"Increase `db_cache_size`." + ) + hard_parse = stats_map.get("parse count (hard)", 0) + total_parse = stats_map.get("parse count (total)", 0) + if total_parse > 0: + hard_pct = hard_parse / total_parse * 100 + if hard_pct > 30: + action_idx += 1 + action_items.append( + f"{action_idx}. **[PARSING]** Hard parse ratio is " + f"{hard_pct:.1f}%. Use bind variables." + ) + sorts_disk = stats_map.get("sorts (disk)", 0) + sorts_mem = stats_map.get("sorts (memory)", 0) + if sorts_disk > 0 and sorts_mem > 0: + disk_pct = sorts_disk / (sorts_mem + sorts_disk) * 100 + if disk_pct > 5: + action_idx += 1 + action_items.append( + f"{action_idx}. **[SORTS]** {disk_pct:.1f}% sorts on disk " + f"({sorts_disk:,}/{sorts_mem + sorts_disk:,}). " + f"Increase `sort_area_size` / `PGA_AGGREGATE_TARGET`." + ) + parts.append("") + + # --- Oracle SGA Info ------------------------------------------------------ + if is_oracle: + sga_rows = _get_rows(data, "sga_info") + if sga_rows: + parts.append("## SGA Configuration") + for row in sga_rows: + name = row.get("name", "?") + size_mb = _safe_float(row.get("size_mb", 0)) + parts.append(f"- **{name}:** {size_mb:.0f} MB") + parts.append("") + + # --- Oracle Tablespace I/O ------------------------------------------------ + if is_oracle: + ts_rows = _get_rows(data, "tablespace_io") + if ts_rows: + parts.append("## Tablespace I/O") + for row in ts_rows: + ts_name = row.get("tablespace_name", "?") + reads = _safe_int(row.get("physical_reads", 0)) + writes = _safe_int(row.get("physical_writes", 0)) + read_sec = _safe_float(row.get("read_time_sec", 0)) + write_sec = _safe_float(row.get("write_time_sec", 0)) + parts.append( + f"- **`{ts_name}`** — reads: {reads:,} ({read_sec:.2f}s), " + f"writes: {writes:,} ({write_sec:.2f}s)" + ) + if read_sec > 10: + action_idx += 1 + action_items.append( + f"{action_idx}. **[I/O]** Tablespace `{ts_name}` has " + f"{read_sec:.2f}s read time. Move to faster storage or " + f"redistribute I/O." + ) + parts.append("") + + # --- Oracle Redo Log Switches --------------------------------------------- + if is_oracle: + redo_rows = _get_rows(data, "redo_log_switches") + if redo_rows: + parts.append("## Redo Log Switches (Last 24h)") + for row in redo_rows: + hour = row.get("switch_hour", "?") + switches = _safe_int(row.get("switches", 0)) + parts.append(f"- **{hour}:** {switches} switches") + if switches > 10: + action_idx += 1 + action_items.append( + f"{action_idx}. **[REDO]** {switches} log switches in hour " + f"{hour}. Increase redo log file size." + ) + parts.append("") + + # --- Oracle Temp Usage ---------------------------------------------------- + if is_oracle: + temp_rows = _get_rows(data, "temp_usage") + if temp_rows: + parts.append("## Temp Tablespace Usage") + for row in temp_rows: + ts_name = row.get("tablespace_name", "?") + used_mb = _safe_float(row.get("used_mb", 0)) + free_mb = _safe_float(row.get("free_mb", 0)) + pct = _safe_float(row.get("pct_used", 0)) + parts.append( + f"- **`{ts_name}`** — {used_mb:.0f} MB used, " + f"{free_mb:.0f} MB free ({pct:.1f}% used)" + ) + if pct > 80: + action_idx += 1 + action_items.append( + f"{action_idx}. **[TEMP]** `{ts_name}` is {pct:.1f}% full. " + f"Add temp datafile or resize." + ) + parts.append("") + + # ===================================================================== + # TOP SQL BY RESOURCE CONSUMPTION + # ===================================================================== + + # --- Top CPU SQL (always show — this is the most important section) ------- + cpu_key = "top_cpu_sql" if is_oracle else "top_cpu_queries" + cpu_rows = _get_rows(data, cpu_key) + # Also check AWR top SQL / pgProfile top SQL as alternatives + if not cpu_rows: + cpu_rows = _get_rows(data, "awr_top_sql") + if not cpu_rows: + cpu_rows = _get_rows(data, "pgprofile_top_sql") + if cpu_rows: + parts.append("## Top SQL by CPU / Elapsed Time") + parts.append("") + for i, row in enumerate(cpu_rows[:15]): + sid = row.get("sql_id") or row.get("queryid") or "?" + id_label = "sql_id" if is_oracle else "queryid" + if is_oracle: + cpu_sec = _safe_float(row.get("cpu_sec", 0)) + elapsed_sec = _safe_float(row.get("elapsed_sec", 0)) + execs = _safe_int(row.get("executions", 0)) + gets = _safe_int(row.get("buffer_gets", 0)) + gets_per = _safe_int(row.get("gets_per_exec", 0)) + sql_text = str(row.get("sql_text") or "") + parts.append( + f"**{i + 1}. {id_label}: `{sid}`** — " + f"CPU: {cpu_sec:.2f}s, elapsed: {elapsed_sec:.2f}s, " + f"{execs:,} executions, buffer gets: {gets:,} " + f"({gets_per:,}/exec)" + ) + else: + total_sec = _safe_float( + row.get("total_exec_sec") or row.get("total_exec_time", 0) + ) + mean_sec = _safe_float( + row.get("mean_exec_sec") or row.get("mean_exec_time", 0) + ) + calls = _safe_int(row.get("calls", 0)) + cache_hit = _safe_float(row.get("cache_hit_pct", 100)) + blk_read = _safe_int(row.get("shared_blks_read", 0)) + blk_hit = _safe_int(row.get("shared_blks_hit", 0)) + parts.append( + f"**{i + 1}. {id_label}: `{sid}`** — " + f"total: {total_sec:.2f}s, avg: {mean_sec:.4f}s/call, " + f"{calls:,} calls, cache hit: {cache_hit:.1f}%, " + f"blks read: {blk_read:,}, blks hit: {blk_hit:,}" + ) + sql_text = str(row.get("sql_text") or row.get("query_text") or "") + if sql_text: + parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") + # Generate action items for top offenders + if i < 5: + if is_oracle: + if gets_per > 10000: + action_idx += 1 + action_items.append( + f"{action_idx}. **[TOP CPU]** `{sid}` — " + f"{gets_per:,} buffer gets/exec. " + f"Review execution plan: " + f"`SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR" + f"('{sid}'));`" + ) + else: + if cache_hit < 90 and blk_read > 1000: + action_idx += 1 + action_items.append( + f"{action_idx}. **[LOW CACHE HIT]** queryid `{sid}` — " + f"cache hit {cache_hit:.1f}%, {blk_read:,} blocks read. " + f"Add indexes or increase `shared_buffers`." + ) + if mean_sec > 1.0: + action_idx += 1 + action_items.append( + f"{action_idx}. **[SLOW QUERY]** queryid `{sid}` — " + f"avg {mean_sec:.4f}s/call. Run " + f"`EXPLAIN (ANALYZE, BUFFERS) ` to investigate." + ) + parts.append("") + + # --- Top Queries by Total Elapsed (fallback if different from CPU) -------- + elapsed_key = "top_elapsed_sql" if is_oracle else "top_queries" + elapsed_rows = _get_rows(data, elapsed_key) + # Only show if we have data AND it's different from cpu_rows + if elapsed_rows and elapsed_key != cpu_key: + # Check if these are substantially different from the CPU rows + cpu_ids = ( + {str(r.get("sql_id") or r.get("queryid") or "") for r in cpu_rows[:10]} + if cpu_rows + else set() + ) + new_rows = [ + r + for r in elapsed_rows + if str(r.get("sql_id") or r.get("queryid") or "") not in cpu_ids + ] + if new_rows: + parts.append("## Additional Top SQL by Total Elapsed Time") + parts.append("*(Not already listed in Top CPU section)*\n") + for i, row in enumerate(new_rows[:10]): + sid = row.get("sql_id") or row.get("queryid") or "?" + id_label = "sql_id" if is_oracle else "queryid" + if is_oracle: + elapsed_sec = _safe_float(row.get("elapsed_sec", 0)) + execs = _safe_int(row.get("executions", 0)) + gets = _safe_int(row.get("buffer_gets", 0)) + sql_text = str(row.get("sql_text") or "") + parts.append( + f"**{i + 1}. {id_label}: `{sid}`** — " + f"elapsed: {elapsed_sec:.2f}s, {execs:,} execs, " + f"buffer gets: {gets:,}" + ) + else: + total_sec = _safe_float(row.get("total_exec_sec", 0)) + mean_sec = _safe_float(row.get("mean_exec_sec", 0)) + calls = _safe_int(row.get("calls", 0)) + cache_hit = _safe_float(row.get("cache_hit_pct", 100)) + parts.append( + f"**{i + 1}. {id_label}: `{sid}`** — " + f"total: {total_sec:.2f}s, avg: {mean_sec:.4f}s/call, " + f"{calls:,} calls, cache hit: {cache_hit:.1f}%" + ) + sql_text = str(row.get("sql_text") or row.get("query_text") or "") + if sql_text: + parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") + parts.append("") + + # ===================================================================== + # THRESHOLD-BASED FINDINGS + # ===================================================================== + # --- High Elapsed Time SQL ------------------------------------------------ section_key = "high_elapsed_per_exec" rows = _get_rows(data, section_key) - parts.append("## High Elapsed Time SQL") + parts.append("## High Elapsed Time per Execution (> 1s avg)") if not rows: - parts.append("No issues found.\n") + parts.append("No queries exceed the 1s/exec threshold.\n") else: parts.append("") for row in rows: @@ -860,27 +1182,37 @@ def _build_findings_report(data: dict[str, Any]) -> str: execs = _safe_int(row.get("executions") or row.get("calls", 0)) sql_text = str(row.get("sql_text") or row.get("query_text") or "") gets = _safe_int(row.get("buffer_gets") or row.get("shared_blks_read", 0)) + id_label = "sql_id" if is_oracle else "queryid" parts.append( - f"**{'sql_id' if is_oracle else 'queryid'}: `{sid}`** — " - f"avg {avg_elapsed:.4f}s/exec, {execs} executions, " - f"total {total_elapsed:.2f}s, buffer gets/reads: {gets}" + f"**{id_label}: `{sid}`** — " + f"avg {avg_elapsed:.4f}s/exec, {execs:,} executions, " + f"total {total_elapsed:.2f}s, buffer gets/reads: {gets:,}" ) if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") + parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") action_idx += 1 - action_items.append( - f"{action_idx}. **[HIGH ELAPSED]** Investigate `{sid}` " - f"(avg {avg_elapsed:.4f}s/exec). Consider adding indexes on " - f"columns used in WHERE/JOIN clauses." - ) + if is_oracle: + action_items.append( + f"{action_idx}. **[HIGH ELAPSED]** `{sid}` " + f"(avg {avg_elapsed:.4f}s/exec). Check plan: " + f"`SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR" + f"('{sid}'));` — add indexes on WHERE/JOIN columns." + ) + else: + action_items.append( + f"{action_idx}. **[HIGH ELAPSED]** queryid `{sid}` " + f"(avg {avg_elapsed:.4f}s/exec). Run " + f"`EXPLAIN (ANALYZE, BUFFERS)` on this query and " + f"add indexes on filtered columns." + ) parts.append("") # --- High Execution Count SQL --------------------------------------------- section_key = "high_execution_count" rows = _get_rows(data, section_key) - parts.append("## High Execution Count SQL") + parts.append("## High Execution Count SQL (> 1000 calls)") if not rows: - parts.append("No issues found.\n") + parts.append("No queries exceed the 1000 execution threshold.\n") else: parts.append("") for row in rows: @@ -890,24 +1222,26 @@ def _build_findings_report(data: dict[str, Any]) -> str: row.get("total_elapsed_sec") or row.get("total_exec_sec", 0) ) sql_text = str(row.get("sql_text") or row.get("query_text") or "") + id_label = "sql_id" if is_oracle else "queryid" parts.append( - f"**{'sql_id' if is_oracle else 'queryid'}: `{sid}`** — " + f"**{id_label}: `{sid}`** — " f"{execs:,} executions, total {total_elapsed:.2f}s" ) if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") + parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") if execs > 100000: action_idx += 1 action_items.append( f"{action_idx}. **[HIGH EXEC COUNT]** `{sid}` executed " - f"{execs:,} times. Consider caching results or batching." + f"{execs:,} times. Consider caching results, batching, " + f"or reducing call frequency." ) parts.append("") # --- Full Table Scans ----------------------------------------------------- fts_key = "full_table_scans" if is_oracle else "seq_scan_tables" rows = _get_rows(data, fts_key) - parts.append("## Full Table Scans") + parts.append("## Full Table Scans / Sequential Scans") if not rows: parts.append("No issues found.\n") else: @@ -918,29 +1252,35 @@ def _build_findings_report(data: dict[str, Any]) -> str: owner = row.get("object_owner", "") sid = row.get("sql_id", "?") execs = _safe_int(row.get("executions", 0)) + elapsed = _safe_float(row.get("elapsed_sec", 0)) + gets = _safe_int(row.get("buffer_gets", 0)) sql_text = str(row.get("sql_text") or "") parts.append( f"**Table: `{owner}.{table}`** — sql_id: `{sid}`, " - f"{execs} executions" + f"{execs:,} execs, {elapsed:.2f}s elapsed, " + f"{gets:,} buffer gets" ) if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") + parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") action_idx += 1 action_items.append( f"{action_idx}. **[FULL TABLE SCAN]** `{owner}.{table}` " - f"via sql_id `{sid}`. Review query and add appropriate index." + f"via sql_id `{sid}`. Add index on columns in WHERE clause " + f"or use hints to force index access." ) else: table = row.get("relname", "?") schema = row.get("schemaname", "public") seq_scans = _safe_int(row.get("seq_scan", 0)) - seq_reads = _safe_int(row.get("seq_tup_read", 0)) + seq_tup_read = _safe_int(row.get("seq_tup_read", 0)) idx_scans = _safe_int(row.get("idx_scan", 0)) live_tup = _safe_int(row.get("n_live_tup", 0)) size_mb = _safe_float(row.get("table_size_mb", 0)) + avg_rows = _safe_int(row.get("avg_rows_per_seq_scan", 0)) parts.append( f"**Table: `{schema}.{table}`** — " - f"{seq_scans:,} seq scans, {seq_reads:,} rows read, " + f"{seq_scans:,} seq scans ({avg_rows:,} rows/scan avg, " + f"{seq_tup_read:,} rows read), " f"{idx_scans:,} idx scans, {live_tup:,} live rows, " f"{size_mb:.1f} MB" ) @@ -948,17 +1288,78 @@ def _build_findings_report(data: dict[str, Any]) -> str: action_idx += 1 action_items.append( f"{action_idx}. **[SEQ SCAN]** `{schema}.{table}` has " - f"{seq_scans:,} seq scans on {live_tup:,} rows. " - f"Add indexes on frequently filtered columns." + f"{seq_scans:,} seq scans on {live_tup:,} rows " + f"({size_mb:.1f} MB). Add indexes on frequently " + f"filtered columns: " + f"`CREATE INDEX ON {schema}.{table} (column_name);`" ) parts.append("") + # --- Execution Plans (Oracle) --------------------------------------------- + if is_oracle: + plans = data.get("execution_plans", []) + if isinstance(plans, list) and plans: + parts.append("## Execution Plans (Top SQL)") + parts.append("") + for plan in plans[:5]: + sid = plan.get("sql_id", "?") + steps = plan.get("steps", []) + parts.append(f"### Plan for sql_id: `{sid}`") + has_full_scan = False + has_hash_join = False + for step in steps[:20]: + op = str(step.get("operation", "")) + obj = step.get("object_name", "") + cost = step.get("cost", "") + est = step.get("est_rows", "") + line = f"- {op}" + if obj: + line += f" on `{obj}`" + if cost: + line += f" (cost={cost}, rows={est})" + parts.append(line) + if "FULL" in op.upper(): + has_full_scan = True + if "HASH JOIN" in op.upper(): + has_hash_join = True + if has_full_scan: + action_idx += 1 + action_items.append( + f"{action_idx}. **[PLAN]** sql_id `{sid}` has TABLE ACCESS " + f"FULL in plan. Add appropriate index." + ) + if has_hash_join: + action_idx += 1 + action_items.append( + f"{action_idx}. **[PLAN]** sql_id `{sid}` uses HASH JOIN. " + f"Ensure join columns are indexed for NESTED LOOPS " + f"if table is small." + ) + parts.append("") + + # --- Oracle Parallel Queries ---------------------------------------------- + if is_oracle: + px_rows = _get_rows(data, "parallel_queries") + if px_rows: + parts.append("## Parallel Queries") + for row in px_rows: + sid = row.get("sql_id", "?") + px = _safe_int(row.get("px_servers", 0)) + elapsed = _safe_float(row.get("elapsed_sec", 0)) + sql_text = str(row.get("sql_text") or "") + parts.append( + f"- **sql_id: `{sid}`** — {px:,} PX servers, {elapsed:.2f}s elapsed" + ) + if sql_text: + parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") + parts.append("") + # --- Row Contention & Locking --------------------------------------------- contention_key = "row_contention" if is_oracle else "lock_waits" rows = _get_rows(data, contention_key) parts.append("## Row Contention & Locking") if not rows: - parts.append("No issues found.\n") + parts.append("No active contention detected.\n") else: parts.append("") for row in rows: @@ -966,9 +1367,10 @@ def _build_findings_report(data: dict[str, Any]) -> str: event = row.get("event", "?") waits = _safe_int(row.get("total_waits", 0)) waited_sec = _safe_float(row.get("time_waited_sec", 0)) + avg_wait = _safe_float(row.get("avg_wait_sec", 0)) parts.append( f"**Event: `{event}`** — {waits:,} waits, " - f"{waited_sec:.2f}s total wait time" + f"{waited_sec:.2f}s total, avg {avg_wait:.4f}s/wait" ) if waited_sec > 1: action_idx += 1 @@ -983,15 +1385,60 @@ def _build_findings_report(data: dict[str, Any]) -> str: event = row.get("wait_event", "?") event_type = row.get("wait_event_type", "") running_sec = _safe_float(row.get("running_sec", 0)) + state = row.get("state", "") query = str(row.get("query") or "") parts.append( - f"**PID {pid}** (user: {user}) — wait: {event_type}/{event}, " - f"running {running_sec:.2f}s" + f"**PID {pid}** (user: {user}, state: {state}) — " + f"wait: {event_type}/{event}, running {running_sec:.2f}s" ) if query: - parts.append(f"```sql\n{_truncate_sql(query, 200)}\n```") + parts.append(f"```sql\n{_truncate_sql(query, 300)}\n```") + if running_sec > 60: + action_idx += 1 + action_items.append( + f"{action_idx}. **[LONG WAIT]** PID {pid} waiting on " + f"{event_type}/{event} for {running_sec:.0f}s. " + f"Consider `SELECT pg_cancel_backend({pid});`" + ) parts.append("") + # --- Wait Events (Oracle / AWR) ------------------------------------------- + if is_oracle: + wait_rows = _get_rows(data, "wait_events") or _get_rows(data, "awr_wait_events") + if wait_rows: + parts.append("## Top Wait Events") + parts.append("") + for row in wait_rows[:15]: + event = row.get("event", "?") + waits = _safe_int(row.get("total_waits", 0)) + waited = _safe_float(row.get("time_waited_sec", 0)) + avg_w = _safe_float(row.get("avg_wait_sec", 0)) + line = f"- **`{event}`** — {waits:,} waits, {waited:.2f}s total" + if avg_w > 0: + line += f", avg {avg_w:.4f}s" + parts.append(line) + if waited > 60: + action_idx += 1 + action_items.append( + f"{action_idx}. **[WAIT]** `{event}` — " + f"{waited:.2f}s total wait time. " + f"Investigate root cause (I/O, lock, latch)." + ) + parts.append("") + + # --- pgProfile Wait Events ------------------------------------------------ + if not is_oracle: + pgp_wait_rows = _get_rows(data, "pgprofile_wait_events") + if pgp_wait_rows: + parts.append("## Wait Events (pgProfile)") + for row in pgp_wait_rows[:15]: + etype = row.get("event_type", "?") + event = row.get("event", "?") + waits = _safe_int(row.get("total_waits", 0)) + waited = _safe_float(row.get("total_waited_sec", 0)) + parts.append(f"- **{etype}/{event}** — {waits:,} waits, {waited:.2f}s") + parts.append("") + # --- Sequence Caching Issues ----------------------------------------------- seq_key = "sequence_no_cache" if is_oracle else "sequence_cache_issues" rows = _get_rows(data, seq_key) @@ -1032,7 +1479,6 @@ def _build_findings_report(data: dict[str, Any]) -> str: rows = _get_rows(data, "stale_statistics") else: rows = _get_rows(data, "stale_stats_vacuum") + _get_rows(data, "bloat_estimate") - # Deduplicate by table name seen_tables: set[str] = set() deduped: list[dict[str, Any]] = [] for r in rows: @@ -1084,10 +1530,11 @@ def _build_findings_report(data: dict[str, Any]) -> str: if dead_pct > 20 or dead > 50000: action_idx += 1 action_items.append( - f"{action_idx}. **[BLOAT]** `VACUUM ANALYZE {schema}.{table};` " + f"{action_idx}. **[BLOAT]** " + f"`VACUUM ANALYZE {schema}.{table};` " f"— {dead_pct:.1f}% dead tuples" ) - elif str(last_analyze) == "never" or str(last_analyze) == "None": + elif str(last_analyze) in ("never", "None"): action_idx += 1 action_items.append( f"{action_idx}. **[STALE STATS]** " @@ -1097,10 +1544,8 @@ def _build_findings_report(data: dict[str, Any]) -> str: # --- Unused Indexes ------------------------------------------------------- rows = _get_rows(data, "unused_indexes") - parts.append("## Unused Indexes") - if not rows: - parts.append("No issues found.\n") - else: + if rows: + parts.append("## Unused Indexes") parts.append("") for row in rows: schema = row.get("schemaname", "public") @@ -1119,6 +1564,30 @@ def _build_findings_report(data: dict[str, Any]) -> str: ) parts.append("") + # --- Table Stats (PostgreSQL) — top tables by activity -------------------- + if not is_oracle: + tbl_rows = _get_rows(data, "table_stats") + if tbl_rows: + parts.append("## Top Tables by Activity") + parts.append("") + for row in tbl_rows[:10]: + schema = row.get("schemaname", "public") + table = row.get("relname", "?") + seq_scan = _safe_int(row.get("seq_scan", 0)) + idx_scan = _safe_int(row.get("idx_scan", 0)) + inserts = _safe_int(row.get("n_tup_ins", 0)) + updates = _safe_int(row.get("n_tup_upd", 0)) + deletes = _safe_int(row.get("n_tup_del", 0)) + live = _safe_int(row.get("n_live_tup", 0)) + dead = _safe_int(row.get("n_dead_tup", 0)) + parts.append( + f"- **`{schema}.{table}`** — seq: {seq_scan:,}, " + f"idx: {idx_scan:,}, ins/upd/del: " + f"{inserts:,}/{updates:,}/{deletes:,}, " + f"live: {live:,}, dead: {dead:,}" + ) + parts.append("") + # --- Checkpoint / WAL Issues (PostgreSQL) --------------------------------- if not is_oracle: cp_rows = _get_rows(data, "checkpoint_stats") @@ -1129,9 +1598,14 @@ def _build_findings_report(data: dict[str, Any]) -> str: backend_pct = _safe_float(row.get("backend_write_pct", 0)) req = _safe_int(row.get("checkpoints_req", 0)) timed = _safe_int(row.get("checkpoints_timed", 0)) + buf_cp = _safe_int(row.get("buffers_checkpoint", 0)) + buf_clean = _safe_int(row.get("buffers_clean", 0)) + buf_backend = _safe_int(row.get("buffers_backend", 0)) parts.append( - f"Checkpoints: {timed} timed, {req} requested. " - f"Backend write %: {backend_pct:.1f}%" + f"- Checkpoints: {timed:,} timed, {req:,} requested\n" + f"- Buffers: checkpoint={buf_cp:,}, clean={buf_clean:,}, " + f"backend={buf_backend:,}\n" + f"- Backend write %: {backend_pct:.1f}%" ) if backend_pct > 10: has_issue = True @@ -1145,51 +1619,42 @@ def _build_findings_report(data: dict[str, Any]) -> str: has_issue = True action_idx += 1 action_items.append( - f"{action_idx}. **[CHECKPOINT]** More requested ({req}) than " - f"timed ({timed}) checkpoints — increase `max_wal_size`." + f"{action_idx}. **[CHECKPOINT]** More requested ({req:,}) " + f"than timed ({timed:,}) checkpoints — increase " + f"`max_wal_size`." ) if not has_issue: parts.append("No issues found.") parts.append("") - # --- Wait Events (Oracle) ------------------------------------------------- - if is_oracle: - rows = _get_rows(data, "wait_events") - parts.append("## Top Wait Events") - if not rows: - parts.append("No issues found.\n") - else: - parts.append("") - for row in rows[:10]: - event = row.get("event", "?") - waits = _safe_int(row.get("total_waits", 0)) - waited = _safe_float(row.get("time_waited_sec", 0)) - parts.append(f"- **`{event}`** — {waits:,} waits, {waited:.2f}s") - parts.append("") - # --- Temp File Usage (PostgreSQL) ----------------------------------------- if not is_oracle: rows = _get_rows(data, "temp_file_usage") if rows: parts.append("## Temp File Usage") parts.append("") - for row in rows[:5]: + for row in rows[:10]: sid = row.get("queryid", "?") temp_mb = _safe_float(row.get("temp_mb", 0)) + calls = _safe_int(row.get("calls", 0)) sql_text = str(row.get("query_text") or "") - parts.append(f"**queryid: `{sid}`** — {temp_mb:.1f} MB temp usage") + parts.append( + f"**queryid: `{sid}`** — {temp_mb:.1f} MB temp, {calls:,} calls" + ) if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 200)}\n```") + parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") if temp_mb > 100: action_idx += 1 action_items.append( f"{action_idx}. **[TEMP FILES]** queryid `{sid}` uses " - f"{temp_mb:.1f} MB temp. Increase `work_mem` or optimize " - f"sort/join." + f"{temp_mb:.1f} MB temp. Increase `work_mem` or " + f"optimize sort/join." ) parts.append("") - # --- Executive Summary & Action Plan -------------------------------------- + # ===================================================================== + # EXECUTIVE SUMMARY & ACTION PLAN + # ===================================================================== summary_parts: list[str] = [] high_elapsed = _get_rows(data, "high_elapsed_per_exec") high_exec = _get_rows(data, "high_execution_count") @@ -1198,7 +1663,14 @@ def _build_findings_report(data: dict[str, Any]) -> str: seqs = _get_rows( data, "sequence_no_cache" if is_oracle else "sequence_cache_issues" ) + top_sql = ( + _get_rows(data, "top_cpu_sql" if is_oracle else "top_cpu_queries") + or _get_rows(data, "awr_top_sql") + or _get_rows(data, "pgprofile_top_sql") + ) + if top_sql: + summary_parts.append(f"{len(top_sql)} top SQL statements analysed") if high_elapsed: summary_parts.append( f"{len(high_elapsed)} queries with high elapsed time per execution" @@ -1209,7 +1681,8 @@ def _build_findings_report(data: dict[str, Any]) -> str: ) if fts: summary_parts.append( - f"{len(fts)} {'full table scans' if is_oracle else 'tables with heavy seq scans'}" + f"{len(fts)} " + f"{'full table scans' if is_oracle else 'tables with heavy seq scans'}" ) if contention: summary_parts.append(f"{len(contention)} contention/lock wait events") @@ -1219,7 +1692,7 @@ def _build_findings_report(data: dict[str, Any]) -> str: exec_summary = ( "Found: " + "; ".join(summary_parts) + "." if summary_parts - else "No significant performance issues detected." + else "No significant performance issues detected in the collected data." ) # Build final report: summary at top, then sections, then action plan @@ -1228,7 +1701,10 @@ def _build_findings_report(data: dict[str, Any]) -> str: if action_items: footer.extend(action_items) else: - footer.append("No action items — database appears healthy.") + footer.append( + "No critical action items — database appears healthy based " + "on collected data." + ) return "\n".join(header + parts + footer) @@ -1317,32 +1793,15 @@ def check_pg_stat_statements(self) -> bool: # -- internal helpers ---------------------------------------------------- def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]: - # Programmatic analysis — Python code identifies all issues. + # Fully programmatic analysis — Python code identifies all issues. + # No LLM involved: codellama hallucinates generic advice. findings_report = _build_findings_report(raw_data) report_text = self._format_report(raw_data) - # Ask the LLM for a brief supplementary summary only. - llm_summary = "" - try: - llm_prompt = ( - findings_report + "\n\n---\n" - "Based on the findings above, write 3-5 sentences summarising " - "the most critical issues and what the DBA should do first. " - "Do NOT repeat the full report. Do NOT invent new findings." - ) - llm_summary = self.llm_client.generate(prompt=llm_prompt) - except (ConnectionError, RuntimeError) as exc: - llm_summary = f"(LLM summary unavailable: {exc})" - - # Combine: programmatic findings + optional LLM summary - analysis = findings_report - if llm_summary: - analysis += f"\n\n---\n## LLM Summary\n{llm_summary}" - return { "raw_data": raw_data, "report_text": report_text, - "analysis": analysis, + "analysis": findings_report, } def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]: diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py index 22eb9de..e517384 100644 --- a/tools/pg-assistant/snapshot_compare.py +++ b/tools/pg-assistant/snapshot_compare.py @@ -281,19 +281,11 @@ def _build_comparison( # Build delta summary table delta_table = self._build_delta_table(data_a, data_b, label_a, label_b) - # Programmatic comparison — Python code identifies all changes. + # Fully programmatic comparison — no LLM involved. findings = self._build_programmatic_comparison( data_a, data_b, label_a, label_b, delta_table ) - - # Optional LLM summary appended after the real findings. - comparison_text = self._format_comparison_text( - data_a, data_b, label_a, label_b, delta_table - ) - llm_summary = self._get_llm_comparison(comparison_text) analysis = findings - if llm_summary: - analysis += f"\n\n---\n## LLM Summary\n{llm_summary}" return { "figures": figures, From f74637a10326addddc55dc86556470311ad1ad30 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 12:12:41 +0000 Subject: [PATCH 18/19] Copilot-quality analysis: severity-grouped bottlenecks, risk register, config review, prioritised actions --- tools/pg-assistant/app.py | 16 +- tools/pg-assistant/auto_analyse.py | 2047 ++++++++++++++---------- tools/pg-assistant/snapshot_compare.py | 54 - 3 files changed, 1246 insertions(+), 871 deletions(-) diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py index de1d146..d134b0b 100644 --- a/tools/pg-assistant/app.py +++ b/tools/pg-assistant/app.py @@ -671,14 +671,14 @@ def _render_comparison(result: dict) -> None: with acol2: if st.button( - "🧠 Full Analysis (Data + LLM)", + "🧠 Full Analysis", use_container_width=True, type="primary", ): analyser = PerformanceAnalyser( db_client=db_client, llm_client=llm_client ) - with st.spinner("Collecting data and running LLM analysis..."): + with st.spinner("Collecting data and running analysis..."): result = analyser.analyse() st.session_state.analyser = analyser st.session_state["_last_analysis"] = result @@ -820,7 +820,7 @@ def _render_comparison(result: dict) -> None: ) else: with st.spinner( - "Collecting pg_stat_statements data and running LLM analysis..." + "Collecting pg_stat_statements data and running analysis..." ): result = analyser.analyse_pg_stat_latest() st.session_state.analyser = analyser @@ -831,7 +831,7 @@ def _render_comparison(result: dict) -> None: elif analyse_mode == "Upload report file": st.markdown( "Upload an **AWR report** (HTML/text), **pg_stat_statements CSV**, " - "or **pgProfile report** (HTML/text) for LLM-powered analysis." + "or **pgProfile report** (HTML/text) for analysis." ) uploaded_file = st.file_uploader( "Choose a report file", @@ -1002,7 +1002,7 @@ def _render_comparison(result: dict) -> None: st.subheader("🔧 SQL Tuning Advisor") st.markdown( "Paste a SQL statement to get its **execution plan**, table metadata, " - "and **LLM-powered tuning recommendations** (index suggestions, " + "and **tuning recommendations** (index suggestions, " "SQL rewrites, stats maintenance)." ) @@ -1049,9 +1049,7 @@ def _render_comparison(result: dict) -> None: if tune_btn and tune_sql.strip(): advisor = SQLTuningAdvisor(db_client=db_client, llm_client=llm_client) - with st.spinner( - "Running EXPLAIN, collecting metadata, analysing with LLM..." - ): + with st.spinner("Running EXPLAIN, collecting metadata, analysing..."): result = advisor.analyse_sql(tune_sql.strip(), run_analyze=run_analyze) if result.get("error"): @@ -1063,7 +1061,7 @@ def _render_comparison(result: dict) -> None: st.subheader("Execution Plan") st.code(plan_text, language="text") - # Show LLM analysis + # Show analysis analysis = result.get("analysis", "") if analysis: st.divider() diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index d3f891f..004b351 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -796,6 +796,119 @@ CROSS JOIN pg_stat_bgwriter bg """ +_PG_TABLE_SIZES = """ + SELECT + schemaname, + relname, + pg_relation_size(relid) / 1048576 AS table_size_mb, + pg_total_relation_size(relid) / 1048576 AS total_size_mb, + (pg_total_relation_size(relid) - pg_relation_size(relid)) / 1048576 + AS toast_index_size_mb, + n_live_tup, + n_dead_tup, + n_tup_ins, n_tup_upd, n_tup_del + FROM pg_stat_user_tables + ORDER BY pg_total_relation_size(relid) DESC + LIMIT 20 +""" + +_PG_WAL_STATS = """ + SELECT + wal_records, + wal_fpi, + wal_bytes, + wal_buffers_full, + wal_write, + wal_sync, + ROUND(wal_write_time::numeric, 2) AS wal_write_time_ms, + ROUND(wal_sync_time::numeric, 2) AS wal_sync_time_ms, + stats_reset::text AS stats_reset + FROM pg_stat_wal +""" + +_PG_IDLE_IN_TRANSACTION = """ + SELECT + pid, + usename, + datname, + state, + LEFT(query, 300) AS query, + ROUND(EXTRACT(EPOCH FROM (now() - state_change))::numeric, 0) + AS idle_duration_sec, + ROUND(EXTRACT(EPOCH FROM (now() - xact_start))::numeric, 0) + AS xact_duration_sec + FROM pg_stat_activity + WHERE state = 'idle in transaction' + ORDER BY xact_start + LIMIT 20 +""" + +_PG_CONFIG_PARAMS = """ + SELECT name, setting, unit + FROM pg_settings + WHERE name IN ( + 'shared_buffers', 'effective_cache_size', 'work_mem', + 'maintenance_work_mem', 'max_connections', 'max_wal_size', + 'min_wal_size', 'checkpoint_timeout', 'checkpoint_completion_target', + 'random_page_cost', 'effective_io_concurrency', + 'autovacuum_max_workers', 'autovacuum_vacuum_scale_factor', + 'autovacuum_analyze_scale_factor', 'statement_timeout', + 'idle_in_transaction_session_timeout', 'wal_level', + 'max_worker_processes', 'max_parallel_workers', + 'max_parallel_workers_per_gather', 'wal_compression', + 'huge_pages', 'shared_preload_libraries' + ) + ORDER BY name +""" + +_PG_REPLICATION_STATUS = """ + SELECT + client_addr::text, + state, + sent_lsn::text, + write_lsn::text, + flush_lsn::text, + replay_lsn::text, + ROUND(EXTRACT(EPOCH FROM write_lag)::numeric, 3) AS write_lag_sec, + ROUND(EXTRACT(EPOCH FROM flush_lag)::numeric, 3) AS flush_lag_sec, + ROUND(EXTRACT(EPOCH FROM replay_lag)::numeric, 3) AS replay_lag_sec + FROM pg_stat_replication +""" + +_ORA_CONFIG_PARAMS = """ + SELECT name, value, description + FROM v$parameter + WHERE name IN ( + 'sga_target', 'sga_max_size', 'pga_aggregate_target', + 'db_cache_size', 'shared_pool_size', 'log_buffer', + 'processes', 'sessions', 'open_cursors', + 'cursor_sharing', 'optimizer_mode', 'db_file_multiblock_read_count', + 'undo_retention', 'undo_tablespace', + 'result_cache_max_size', 'parallel_max_servers', + 'parallel_min_servers', 'job_queue_processes' + ) + ORDER BY name +""" + +_ORA_IDLE_SESSIONS = """ + SELECT * FROM ( + SELECT + sid, + serial#, + username, + status, + machine, + program, + ROUND(last_call_et / 60, 1) AS idle_minutes, + sql_id AS last_sql_id + FROM v$session + WHERE status = 'INACTIVE' + AND type = 'USER' + AND last_call_et > 300 + ORDER BY last_call_et DESC + ) WHERE ROWNUM <= 20 +""" + # --------------------------------------------------------------------------- # Programmatic analysis — Python code does the heavy lifting, not the LLM. # --------------------------------------------------------------------------- @@ -827,32 +940,60 @@ def _truncate_sql(sql_text: str, length: int = 200) -> str: return sql_text +def _fmt_bytes(b: float) -> str: + """Format bytes into human-readable size.""" + if b >= 1073741824: + return f"{b / 1073741824:.1f} GB" + if b >= 1048576: + return f"{b / 1048576:.1f} MB" + if b >= 1024: + return f"{b / 1024:.1f} KB" + return f"{b:.0f} B" + + +def _fmt_secs(s: float) -> str: + """Format seconds into human-readable duration.""" + if s >= 86400: + return f"{s / 86400:.1f} days" + if s >= 3600: + return f"{s / 3600:.1f} hrs" + if s >= 60: + return f"{s / 60:.1f} min" + return f"{s:.2f} sec" + + def _build_findings_report(data: dict[str, Any]) -> str: - """Analyse collected data programmatically and build a markdown report. + """Production-grade performance analysis report. - This function does the actual analysis in Python code — identifying - problematic SQL, full table scans, missing indexes, etc. from the - real data. No LLM is involved in finding issues. + Analyses collected data programmatically — identifies bottlenecks, + groups by severity, references specific SQL IDs / table names / metrics. + No LLM is involved. Output format inspired by enterprise DBA assessments. """ db_type = data.get("db_type", "unknown") is_oracle = db_type == DB_TYPE_ORACLE - parts: list[str] = [] - action_items: list[str] = [] - action_idx = 0 - - parts.append(f"# Performance Analysis Report ({db_type.upper()})") - parts.append("") + # Accumulate bottlenecks as (severity, title, details_markdown) + bottlenecks: list[tuple[int, str, str]] = [] + # Accumulate prioritised actions as (priority, action_text) + actions: list[tuple[int, str]] = [] + # Accumulate risks as (likelihood, impact, description) + risks: list[tuple[str, str, str]] = [] + act_idx = 0 # ===================================================================== - # DATABASE-LEVEL OVERVIEW + # PHASE 1 — Extract key metrics # ===================================================================== - # --- Database Stats (PostgreSQL) ------------------------------------------ + # --- PostgreSQL database stats --- + cache_hit = 100.0 + commits = rollbacks = 0 + backends = 0 + temp_bytes = temp_files = 0 + blks_read = blks_hit = 0 if not is_oracle: db_rows = _get_rows(data, "database_stats") if db_rows: row = db_rows[0] - cache_hit = _safe_float(row.get("cache_hit_pct", 0)) + cache_hit = _safe_float(row.get("cache_hit_pct", 100)) commits = _safe_int(row.get("xact_commit", 0)) rollbacks = _safe_int(row.get("xact_rollback", 0)) backends = _safe_int(row.get("numbackends", 0)) @@ -860,853 +1001,1138 @@ def _build_findings_report(data: dict[str, Any]) -> str: blks_hit = _safe_int(row.get("blks_hit", 0)) temp_bytes = _safe_int(row.get("temp_bytes", 0)) temp_files = _safe_int(row.get("temp_files", 0)) - parts.append("## Database Overview") - parts.append( - f"- **Cache hit ratio:** {cache_hit:.2f}%\n" - f"- **Active backends:** {backends}\n" - f"- **Transactions:** {commits:,} commits, {rollbacks:,} rollbacks\n" - f"- **Blocks:** {blks_hit:,} hit, {blks_read:,} read from disk\n" - f"- **Temp usage:** {temp_files:,} files, " - f"{temp_bytes / 1048576:.1f} MB" - ) - if cache_hit < 95 and blks_read > 0: - action_idx += 1 - action_items.append( - f"{action_idx}. **[CACHE]** Cache hit ratio is {cache_hit:.2f}% " - f"(target > 99%). Increase `shared_buffers`." - ) - if rollbacks > 0 and commits > 0: - rb_pct = rollbacks / (commits + rollbacks) * 100 - if rb_pct > 5: - action_idx += 1 - action_items.append( - f"{action_idx}. **[ROLLBACKS]** {rb_pct:.1f}% rollback rate " - f"({rollbacks:,}/{commits + rollbacks:,}). " - f"Investigate application error handling." - ) - parts.append("") - # --- Connection Stats (PostgreSQL) ---------------------------------------- - if not is_oracle: - conn_rows = _get_rows(data, "connection_stats") - if conn_rows: - parts.append("## Connection Stats") - for row in conn_rows: - state = row.get("state", "unknown") or "null" - count = _safe_int(row.get("count", 0)) - wtype = row.get("wait_event_type", "None") - parts.append(f"- **{state}**: {count} connections (wait: {wtype})") - idle_count = sum( - _safe_int(r.get("count", 0)) - for r in conn_rows - if (r.get("state") or "").startswith("idle") - ) - if idle_count > 50: - action_idx += 1 - action_items.append( - f"{action_idx}. **[CONNECTIONS]** {idle_count} idle connections. " - f"Use connection pooling (PgBouncer)." - ) - parts.append("") - - # --- Oracle System Stats -------------------------------------------------- + # --- Oracle system stats --- + ora_cache_hit = 100.0 + ora_hard_parse_pct = 0.0 + ora_disk_sort_pct = 0.0 + ora_rb_count = 0 + ora_commit_count = 0 if is_oracle: sys_rows = _get_rows(data, "system_stats") or _get_rows( data, "awr_system_stats" ) - if sys_rows: - parts.append("## System Statistics") - stats_map: dict[str, int] = {} - for row in sys_rows: - name = str(row.get("name", "")) - val = _safe_int(row.get("value", 0)) - stats_map[name] = val - parts.append(f"- **{name}:** {val:,}") - # Cache hit ratio - db_gets = stats_map.get("db block gets", 0) - consistent = stats_map.get("consistent gets", 0) - phys_reads = stats_map.get("physical reads", 0) - logical = db_gets + consistent - if logical > 0: - hit_pct = (1 - phys_reads / logical) * 100 - parts.append(f"\n**Buffer cache hit ratio: {hit_pct:.2f}%**") - if hit_pct < 95: - action_idx += 1 - action_items.append( - f"{action_idx}. **[CACHE]** Buffer cache hit ratio is " - f"{hit_pct:.2f}% (target > 99%). " - f"Increase `db_cache_size`." - ) - hard_parse = stats_map.get("parse count (hard)", 0) - total_parse = stats_map.get("parse count (total)", 0) - if total_parse > 0: - hard_pct = hard_parse / total_parse * 100 - if hard_pct > 30: - action_idx += 1 - action_items.append( - f"{action_idx}. **[PARSING]** Hard parse ratio is " - f"{hard_pct:.1f}%. Use bind variables." - ) - sorts_disk = stats_map.get("sorts (disk)", 0) - sorts_mem = stats_map.get("sorts (memory)", 0) - if sorts_disk > 0 and sorts_mem > 0: - disk_pct = sorts_disk / (sorts_mem + sorts_disk) * 100 - if disk_pct > 5: - action_idx += 1 - action_items.append( - f"{action_idx}. **[SORTS]** {disk_pct:.1f}% sorts on disk " - f"({sorts_disk:,}/{sorts_mem + sorts_disk:,}). " - f"Increase `sort_area_size` / `PGA_AGGREGATE_TARGET`." - ) - parts.append("") + stats_map: dict[str, int] = {} + for row in sys_rows: + stats_map[str(row.get("name", ""))] = _safe_int(row.get("value", 0)) + db_gets = stats_map.get("db block gets", 0) + consistent = stats_map.get("consistent gets", 0) + phys_reads = stats_map.get("physical reads", 0) + logical = db_gets + consistent + if logical > 0: + ora_cache_hit = (1 - phys_reads / logical) * 100 + hard_parse = stats_map.get("parse count (hard)", 0) + total_parse = stats_map.get("parse count (total)", 0) + if total_parse > 0: + ora_hard_parse_pct = hard_parse / total_parse * 100 + sorts_disk = stats_map.get("sorts (disk)", 0) + sorts_mem = stats_map.get("sorts (memory)", 0) + if sorts_mem + sorts_disk > 0: + ora_disk_sort_pct = sorts_disk / (sorts_mem + sorts_disk) * 100 + ora_rb_count = stats_map.get("user rollbacks", 0) + ora_commit_count = stats_map.get("user commits", 0) + + # --- WAL stats (PostgreSQL 14+) --- + wal_bytes = 0 + wal_fpi = 0 + wal_sync_time_ms = 0.0 + wal_write_time_ms = 0.0 + if not is_oracle: + wal_rows = _get_rows(data, "wal_stats") + if wal_rows: + w = wal_rows[0] + wal_bytes = _safe_int(w.get("wal_bytes", 0)) + wal_fpi = _safe_int(w.get("wal_fpi", 0)) + wal_sync_time_ms = _safe_float(w.get("wal_sync_time_ms", 0)) + wal_write_time_ms = _safe_float(w.get("wal_write_time_ms", 0)) + + # --- Connection counts --- + idle_in_tx_rows = _get_rows(data, "idle_in_transaction") if not is_oracle else [] + idle_session_rows = _get_rows(data, "idle_sessions") if is_oracle else [] + conn_rows = _get_rows(data, "connection_stats") if not is_oracle else [] + idle_count = sum( + _safe_int(r.get("count", 0)) + for r in conn_rows + if (r.get("state") or "").startswith("idle") + ) - # --- Oracle SGA Info ------------------------------------------------------ - if is_oracle: - sga_rows = _get_rows(data, "sga_info") - if sga_rows: - parts.append("## SGA Configuration") - for row in sga_rows: - name = row.get("name", "?") - size_mb = _safe_float(row.get("size_mb", 0)) - parts.append(f"- **{name}:** {size_mb:.0f} MB") - parts.append("") + # --- Top SQL --- + top_cpu = ( + _get_rows(data, "top_cpu_sql") + if is_oracle + else _get_rows(data, "top_cpu_queries") + ) + top_elapsed = ( + _get_rows(data, "top_elapsed_sql") + if is_oracle + else _get_rows(data, "top_queries") + ) + # Fallback to AWR / pgProfile + if not top_cpu and not top_elapsed: + top_elapsed = _get_rows(data, "awr_top_sql") or _get_rows( + data, "pgprofile_top_sql" + ) + high_elapsed = _get_rows(data, "high_elapsed_per_exec") + high_exec = _get_rows(data, "high_execution_count") + fts = ( + _get_rows(data, "full_table_scans") + if is_oracle + else _get_rows(data, "seq_scan_tables") + ) - # --- Oracle Tablespace I/O ------------------------------------------------ - if is_oracle: - ts_rows = _get_rows(data, "tablespace_io") - if ts_rows: - parts.append("## Tablespace I/O") - for row in ts_rows: - ts_name = row.get("tablespace_name", "?") - reads = _safe_int(row.get("physical_reads", 0)) - writes = _safe_int(row.get("physical_writes", 0)) - read_sec = _safe_float(row.get("read_time_sec", 0)) - write_sec = _safe_float(row.get("write_time_sec", 0)) - parts.append( - f"- **`{ts_name}`** — reads: {reads:,} ({read_sec:.2f}s), " - f"writes: {writes:,} ({write_sec:.2f}s)" - ) - if read_sec > 10: - action_idx += 1 - action_items.append( - f"{action_idx}. **[I/O]** Tablespace `{ts_name}` has " - f"{read_sec:.2f}s read time. Move to faster storage or " - f"redistribute I/O." - ) - parts.append("") + # --- Tables --- + table_sizes = _get_rows(data, "table_sizes") if not is_oracle else [] + bloat_rows = _get_rows(data, "bloat_estimate") if not is_oracle else [] + unused_idx = _get_rows(data, "unused_indexes") + stale_rows = ( + _get_rows(data, "stale_stats_vacuum") + if not is_oracle + else _get_rows(data, "stale_statistics") + ) - # --- Oracle Redo Log Switches --------------------------------------------- - if is_oracle: - redo_rows = _get_rows(data, "redo_log_switches") - if redo_rows: - parts.append("## Redo Log Switches (Last 24h)") - for row in redo_rows: - hour = row.get("switch_hour", "?") - switches = _safe_int(row.get("switches", 0)) - parts.append(f"- **{hour}:** {switches} switches") - if switches > 10: - action_idx += 1 - action_items.append( - f"{action_idx}. **[REDO]** {switches} log switches in hour " - f"{hour}. Increase redo log file size." - ) - parts.append("") + # --- Contention --- + contention = ( + _get_rows(data, "row_contention") + if is_oracle + else _get_rows(data, "lock_waits") + ) + wait_rows = ( + _get_rows(data, "wait_events") + or _get_rows(data, "awr_wait_events") + or _get_rows(data, "pgprofile_wait_events") + ) - # --- Oracle Temp Usage ---------------------------------------------------- - if is_oracle: - temp_rows = _get_rows(data, "temp_usage") - if temp_rows: - parts.append("## Temp Tablespace Usage") - for row in temp_rows: - ts_name = row.get("tablespace_name", "?") - used_mb = _safe_float(row.get("used_mb", 0)) - free_mb = _safe_float(row.get("free_mb", 0)) - pct = _safe_float(row.get("pct_used", 0)) - parts.append( - f"- **`{ts_name}`** — {used_mb:.0f} MB used, " - f"{free_mb:.0f} MB free ({pct:.1f}% used)" - ) - if pct > 80: - action_idx += 1 - action_items.append( - f"{action_idx}. **[TEMP]** `{ts_name}` is {pct:.1f}% full. " - f"Add temp datafile or resize." - ) - parts.append("") + # --- Sequences --- + seqs = ( + _get_rows(data, "sequence_no_cache") + if is_oracle + else _get_rows(data, "sequence_cache_issues") + ) + + # --- Config --- + config_rows = _get_rows(data, "config_params") + + # --- Checkpoint (PG) --- + ckpt_rows = _get_rows(data, "checkpoint_stats") if not is_oracle else [] + + # --- Replication (PG) --- + repl_rows = _get_rows(data, "replication_status") if not is_oracle else [] + + # --- Temp file usage (PG) --- + temp_sql_rows = _get_rows(data, "temp_file_usage") if not is_oracle else [] # ===================================================================== - # TOP SQL BY RESOURCE CONSUMPTION + # PHASE 2 -- Identify bottlenecks with severity # ===================================================================== + # Severity 1 = critical, 2 = important, 3 = advisory + + # -- Rollback explosion -- + rb_rate = 0.0 + if not is_oracle and commits + rollbacks > 0: + rb_rate = rollbacks / (commits + rollbacks) * 100 + elif is_oracle and ora_commit_count + ora_rb_count > 0: + rb_rate = ora_rb_count / (ora_commit_count + ora_rb_count) * 100 + if rb_rate > 10: + detail = ( + f"**{rollbacks:,} rollbacks** vs {commits:,} commits " + f"(**{rb_rate:.1f}% rollback rate**)\n\n" + if not is_oracle + else f"**{ora_rb_count:,} rollbacks** vs {ora_commit_count:,} commits " + f"(**{rb_rate:.1f}% rollback rate**)\n\n" + ) + detail += ( + "This almost always means:\n" + "- Business validation aborts\n" + "- Exception-based flow control\n" + "- Retry loops without guardrails\n\n" + "Directly increases WAL, dead tuples, autovacuum load." + ) + bottlenecks.append((1, "Rollback Explosion", detail)) + act_idx += 1 + actions.append( + ( + 0, + f"{act_idx}. **Root-cause rollbacks** -- identify " + f"why {rb_rate:.1f}% of transactions are aborted", + ) + ) + risks.append(("High", "Severe", "Dead tuple accumulation from rollbacks")) + elif rb_rate > 5: + total_rb = rollbacks if not is_oracle else ora_rb_count + bottlenecks.append( + ( + 2, + "Elevated Rollback Rate", + f"Rollback rate is **{rb_rate:.1f}%** " + f"({total_rb:,} rollbacks). Investigate application " + f"error handling.", + ) + ) + act_idx += 1 + actions.append((1, f"{act_idx}. Investigate rollback sources")) - # --- Top CPU SQL (always show — this is the most important section) ------- - cpu_key = "top_cpu_sql" if is_oracle else "top_cpu_queries" - cpu_rows = _get_rows(data, cpu_key) - # Also check AWR top SQL / pgProfile top SQL as alternatives - if not cpu_rows: - cpu_rows = _get_rows(data, "awr_top_sql") - if not cpu_rows: - cpu_rows = _get_rows(data, "pgprofile_top_sql") - if cpu_rows: - parts.append("## Top SQL by CPU / Elapsed Time") - parts.append("") - for i, row in enumerate(cpu_rows[:15]): - sid = row.get("sql_id") or row.get("queryid") or "?" - id_label = "sql_id" if is_oracle else "queryid" - if is_oracle: - cpu_sec = _safe_float(row.get("cpu_sec", 0)) - elapsed_sec = _safe_float(row.get("elapsed_sec", 0)) - execs = _safe_int(row.get("executions", 0)) - gets = _safe_int(row.get("buffer_gets", 0)) - gets_per = _safe_int(row.get("gets_per_exec", 0)) - sql_text = str(row.get("sql_text") or "") - parts.append( - f"**{i + 1}. {id_label}: `{sid}`** — " - f"CPU: {cpu_sec:.2f}s, elapsed: {elapsed_sec:.2f}s, " - f"{execs:,} executions, buffer gets: {gets:,} " - f"({gets_per:,}/exec)" - ) - else: - total_sec = _safe_float( - row.get("total_exec_sec") or row.get("total_exec_time", 0) - ) - mean_sec = _safe_float( - row.get("mean_exec_sec") or row.get("mean_exec_time", 0) - ) - calls = _safe_int(row.get("calls", 0)) - cache_hit = _safe_float(row.get("cache_hit_pct", 100)) - blk_read = _safe_int(row.get("shared_blks_read", 0)) - blk_hit = _safe_int(row.get("shared_blks_hit", 0)) - parts.append( - f"**{i + 1}. {id_label}: `{sid}`** — " - f"total: {total_sec:.2f}s, avg: {mean_sec:.4f}s/call, " - f"{calls:,} calls, cache hit: {cache_hit:.1f}%, " - f"blks read: {blk_read:,}, blks hit: {blk_hit:,}" - ) - sql_text = str(row.get("sql_text") or row.get("query_text") or "") - if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") - # Generate action items for top offenders - if i < 5: - if is_oracle: - if gets_per > 10000: - action_idx += 1 - action_items.append( - f"{action_idx}. **[TOP CPU]** `{sid}` — " - f"{gets_per:,} buffer gets/exec. " - f"Review execution plan: " - f"`SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR" - f"('{sid}'));`" - ) - else: - if cache_hit < 90 and blk_read > 1000: - action_idx += 1 - action_items.append( - f"{action_idx}. **[LOW CACHE HIT]** queryid `{sid}` — " - f"cache hit {cache_hit:.1f}%, {blk_read:,} blocks read. " - f"Add indexes or increase `shared_buffers`." - ) - if mean_sec > 1.0: - action_idx += 1 - action_items.append( - f"{action_idx}. **[SLOW QUERY]** queryid `{sid}` — " - f"avg {mean_sec:.4f}s/call. Run " - f"`EXPLAIN (ANALYZE, BUFFERS) ` to investigate." - ) - parts.append("") + # -- Idle-in-transaction (PG) -- + if idle_in_tx_rows: + total_idle_sec = sum( + _safe_float(r.get("xact_duration_sec", 0)) for r in idle_in_tx_rows + ) + longest = max( + _safe_float(r.get("xact_duration_sec", 0)) for r in idle_in_tx_rows + ) + detail = ( + f"**{len(idle_in_tx_rows)} sessions** idle in transaction, " + f"cumulative **{_fmt_secs(total_idle_sec)}**, " + f"longest **{_fmt_secs(longest)}**\n\n" + "| PID | User | Duration | Query |\n" + "| --- | --- | --- | --- |\n" + ) + for r in idle_in_tx_rows[:10]: + pid = r.get("pid", "?") + user = r.get("usename", "?") + dur = _fmt_secs(_safe_float(r.get("xact_duration_sec", 0))) + q = _truncate_sql(str(r.get("query", "")), 80) + detail += f"| {pid} | {user} | {dur} | `{q}` |\n" + detail += ( + "\nImpact: prevents vacuum, creates dead tuples, " + "increases lock contention.\n" + "This is an **application defect**, not a DB tuning issue." + ) + sev = 1 if total_idle_sec > 3600 else 2 + bottlenecks.append((sev, "Idle-in-Transaction Sessions", detail)) + act_idx += 1 + actions.append( + ( + 0, + f"{act_idx}. **Fix idle-in-transaction at app layer** -- " + f"enforce connection/transaction guards, " + f"set `idle_in_transaction_session_timeout`", + ) + ) + risks.append(("High", "Severe", "Bloat & lock risk from idle-in-tx")) + + # -- Idle Oracle sessions -- + if idle_session_rows and len(idle_session_rows) > 5: + detail = ( + f"**{len(idle_session_rows)} sessions** idle > 5 minutes\n\n" + "| SID | User | Idle (min) | Program |\n" + "| --- | --- | --- | --- |\n" + ) + for r in idle_session_rows[:10]: + detail += ( + f"| {r.get('sid', '?')} | {r.get('username', '?')} " + f"| {_safe_float(r.get('idle_minutes', 0)):.0f} " + f"| {r.get('program', '?')} |\n" + ) + bottlenecks.append((2, "Excessive Idle Sessions", detail)) + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. Review idle sessions -- consider " + f"connection pooling or session timeout", + ) + ) - # --- Top Queries by Total Elapsed (fallback if different from CPU) -------- - elapsed_key = "top_elapsed_sql" if is_oracle else "top_queries" - elapsed_rows = _get_rows(data, elapsed_key) - # Only show if we have data AND it's different from cpu_rows - if elapsed_rows and elapsed_key != cpu_key: - # Check if these are substantially different from the CPU rows - cpu_ids = ( - {str(r.get("sql_id") or r.get("queryid") or "") for r in cpu_rows[:10]} - if cpu_rows - else set() + # -- Cache hit ratio -- + eff_cache_hit = cache_hit if not is_oracle else ora_cache_hit + if eff_cache_hit < 95: + detail = f"Buffer cache hit ratio: **{eff_cache_hit:.2f}%** (target > 99%)\n\n" + if not is_oracle: + detail += ( + f"Blocks hit: {blks_hit:,}, blocks read from disk: {blks_read:,}\n\n" + f"**Fix:** Increase `shared_buffers` " + f"(current value shown in Configuration Review below)." + ) + else: + detail += "**Fix:** Increase `db_cache_size`." + sev = 1 if eff_cache_hit < 90 else 2 + bottlenecks.append((sev, "Low Buffer Cache Hit Ratio", detail)) + act_idx += 1 + param = "shared_buffers" if not is_oracle else "db_cache_size" + actions.append( + ( + 0, + f"{act_idx}. **Increase `{param}`** -- cache hit is {eff_cache_hit:.2f}%", + ) + ) + risks.append(("High", "High", "Excessive disk I/O from cache misses")) + + # -- WAL pressure (PG) -- + if wal_bytes > 0: + wal_gb = wal_bytes / 1073741824 + detail = ( + f"**{wal_gb:.1f} GB WAL** generated (since stats reset)\n" + f"- Full-page images (FPI): {wal_fpi:,}\n" + f"- WAL sync time: {wal_sync_time_ms / 1000:.1f} sec\n" + f"- WAL write time: {wal_write_time_ms / 1000:.1f} sec\n" + ) + if wal_sync_time_ms > wal_write_time_ms * 5 and wal_sync_time_ms > 1000: + detail += ( + "\nWAL sync time is **much higher** than write time " + "-- disk sync latency issue." + ) + bottlenecks.append((1, "WAL & Write Pressure", detail)) + risks.append(("Medium-High", "Severe", "WAL disk saturation")) + elif wal_gb > 10: + bottlenecks.append((2, "High WAL Volume", detail)) + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. Review WAL generation -- " + f"batch commits, consider `wal_compression`", + ) ) - new_rows = [ - r - for r in elapsed_rows - if str(r.get("sql_id") or r.get("queryid") or "") not in cpu_ids - ] - if new_rows: - parts.append("## Additional Top SQL by Total Elapsed Time") - parts.append("*(Not already listed in Top CPU section)*\n") - for i, row in enumerate(new_rows[:10]): - sid = row.get("sql_id") or row.get("queryid") or "?" - id_label = "sql_id" if is_oracle else "queryid" - if is_oracle: - elapsed_sec = _safe_float(row.get("elapsed_sec", 0)) - execs = _safe_int(row.get("executions", 0)) - gets = _safe_int(row.get("buffer_gets", 0)) - sql_text = str(row.get("sql_text") or "") - parts.append( - f"**{i + 1}. {id_label}: `{sid}`** — " - f"elapsed: {elapsed_sec:.2f}s, {execs:,} execs, " - f"buffer gets: {gets:,}" - ) - else: - total_sec = _safe_float(row.get("total_exec_sec", 0)) - mean_sec = _safe_float(row.get("mean_exec_sec", 0)) - calls = _safe_int(row.get("calls", 0)) - cache_hit = _safe_float(row.get("cache_hit_pct", 100)) - parts.append( - f"**{i + 1}. {id_label}: `{sid}`** — " - f"total: {total_sec:.2f}s, avg: {mean_sec:.4f}s/call, " - f"{calls:,} calls, cache hit: {cache_hit:.1f}%" - ) - sql_text = str(row.get("sql_text") or row.get("query_text") or "") - if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") - parts.append("") - # ===================================================================== - # THRESHOLD-BASED FINDINGS - # ===================================================================== + # -- Hard parse ratio (Oracle) -- + if is_oracle and ora_hard_parse_pct > 30: + bottlenecks.append( + ( + 2, + "High Hard Parse Ratio", + f"Hard parse ratio: **{ora_hard_parse_pct:.1f}%**\n\n" + f"**Fix:** Use bind variables instead of literal values.", + ) + ) + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. Use bind variables -- " + f"hard parse ratio is {ora_hard_parse_pct:.1f}%", + ) + ) - # --- High Elapsed Time SQL ------------------------------------------------ - section_key = "high_elapsed_per_exec" - rows = _get_rows(data, section_key) - parts.append("## High Elapsed Time per Execution (> 1s avg)") - if not rows: - parts.append("No queries exceed the 1s/exec threshold.\n") - else: - parts.append("") - for row in rows: - sid = row.get("sql_id") or row.get("queryid") or "?" - avg_elapsed = _safe_float(row.get("avg_elapsed_sec", 0)) - total_elapsed = _safe_float( - row.get("total_elapsed_sec") or row.get("total_exec_sec", 0) + # -- Disk sorts (Oracle) -- + if is_oracle and ora_disk_sort_pct > 5: + bottlenecks.append( + ( + 2, + "Disk Sorts", + f"**{ora_disk_sort_pct:.1f}%** of sorts go to disk.\n\n" + f"**Fix:** Increase `PGA_AGGREGATE_TARGET` or `SORT_AREA_SIZE`.", ) - execs = _safe_int(row.get("executions") or row.get("calls", 0)) - sql_text = str(row.get("sql_text") or row.get("query_text") or "") - gets = _safe_int(row.get("buffer_gets") or row.get("shared_blks_read", 0)) - id_label = "sql_id" if is_oracle else "queryid" - parts.append( - f"**{id_label}: `{sid}`** — " - f"avg {avg_elapsed:.4f}s/exec, {execs:,} executions, " - f"total {total_elapsed:.2f}s, buffer gets/reads: {gets:,}" + ) + act_idx += 1 + actions.append( + (1, f"{act_idx}. Increase PGA -- {ora_disk_sort_pct:.1f}% disk sorts") + ) + + # -- Top SQL bottlenecks -- + top_sql_all = top_cpu or top_elapsed + if top_sql_all: + total_elapsed = sum( + _safe_float( + r.get("elapsed_sec", 0) + or r.get("total_exec_sec", 0) + or r.get("cpu_sec", 0) ) - if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") - action_idx += 1 + for r in top_sql_all + ) + top1 = top_sql_all[0] + if is_oracle: + t1_id = top1.get("sql_id", "?") + t1_elapsed = _safe_float( + top1.get("elapsed_sec", 0) or top1.get("cpu_sec", 0) + ) + t1_execs = _safe_int(top1.get("executions", 0)) + t1_gets = _safe_int(top1.get("buffer_gets", 0)) + t1_sql = str(top1.get("sql_text", "")) + else: + t1_id = str(top1.get("queryid", "?")) + t1_elapsed = _safe_float(top1.get("total_exec_sec", 0)) + t1_execs = _safe_int(top1.get("calls", 0)) + t1_gets = _safe_int(top1.get("shared_blks_hit", 0)) + _safe_int( + top1.get("shared_blks_read", 0) + ) + t1_sql = str(top1.get("query_text", "")) + + detail = ( + f"Top query alone: **{_fmt_secs(t1_elapsed)}** elapsed, " + f"**{t1_execs:,}** executions, **{t1_gets:,}** buffer gets\n\n" + ) + id_col = "sql_id" if is_oracle else "queryid" + detail += ( + f"| # | {id_col} | Elapsed | Executions | Buffer Gets | Query |\n" + f"| --- | --- | --- | --- | --- | --- |\n" + ) + for i, r in enumerate(top_sql_all[:10]): if is_oracle: - action_items.append( - f"{action_idx}. **[HIGH ELAPSED]** `{sid}` " - f"(avg {avg_elapsed:.4f}s/exec). Check plan: " - f"`SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR" - f"('{sid}'));` — add indexes on WHERE/JOIN columns." - ) + sid = r.get("sql_id", "?") + elapsed = _safe_float(r.get("elapsed_sec", 0) or r.get("cpu_sec", 0)) + execs = _safe_int(r.get("executions", 0)) + gets = _safe_int(r.get("buffer_gets", 0)) + sql = _truncate_sql(str(r.get("sql_text", "")), 60) else: - action_items.append( - f"{action_idx}. **[HIGH ELAPSED]** queryid `{sid}` " - f"(avg {avg_elapsed:.4f}s/exec). Run " - f"`EXPLAIN (ANALYZE, BUFFERS)` on this query and " - f"add indexes on filtered columns." + sid = str(r.get("queryid", "?")) + elapsed = _safe_float(r.get("total_exec_sec", 0)) + execs = _safe_int(r.get("calls", 0)) + gets = _safe_int(r.get("shared_blks_hit", 0)) + _safe_int( + r.get("shared_blks_read", 0) ) - parts.append("") - - # --- High Execution Count SQL --------------------------------------------- - section_key = "high_execution_count" - rows = _get_rows(data, section_key) - parts.append("## High Execution Count SQL (> 1000 calls)") - if not rows: - parts.append("No queries exceed the 1000 execution threshold.\n") - else: - parts.append("") - for row in rows: - sid = row.get("sql_id") or row.get("queryid") or "?" - execs = _safe_int(row.get("executions") or row.get("calls", 0)) - total_elapsed = _safe_float( - row.get("total_elapsed_sec") or row.get("total_exec_sec", 0) + sql = _truncate_sql(str(r.get("query_text", "")), 60) + detail += ( + f"| {i + 1} | `{sid}` | {_fmt_secs(elapsed)} " + f"| {execs:,} | {gets:,} | `{sql}` |\n" ) - sql_text = str(row.get("sql_text") or row.get("query_text") or "") - id_label = "sql_id" if is_oracle else "queryid" - parts.append( - f"**{id_label}: `{sid}`** — " - f"{execs:,} executions, total {total_elapsed:.2f}s" + + if t1_sql: + detail += ( + f"\n**Top #1 full query text:**\n" + f"```sql\n{_truncate_sql(t1_sql, 500)}\n```\n" ) - if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") - if execs > 100000: - action_idx += 1 - action_items.append( - f"{action_idx}. **[HIGH EXEC COUNT]** `{sid}` executed " - f"{execs:,} times. Consider caching results, batching, " - f"or reducing call frequency." + + if len(top_sql_all) >= 3: + top3_elapsed = sum( + _safe_float( + r.get("elapsed_sec", 0) + or r.get("total_exec_sec", 0) + or r.get("cpu_sec", 0) ) - parts.append("") + for r in top_sql_all[:3] + ) + if total_elapsed > 0 and top3_elapsed / total_elapsed > 0.7: + pct = top3_elapsed / total_elapsed * 100 + detail += f"\n**Top 3 queries = ~{pct:.0f}% of total execution time.**" + + bottlenecks.append((1, "Query-Level Offenders (Top SQL)", detail)) + act_idx += 1 + actions.append( + ( + 0, + f"{act_idx}. **Review top SQL** -- " + f"{id_col} `{t1_id}` accounts for " + f"{_fmt_secs(t1_elapsed)} elapsed", + ) + ) - # --- Full Table Scans ----------------------------------------------------- - fts_key = "full_table_scans" if is_oracle else "seq_scan_tables" - rows = _get_rows(data, fts_key) - parts.append("## Full Table Scans / Sequential Scans") - if not rows: - parts.append("No issues found.\n") - else: - parts.append("") - for row in rows: + # -- High elapsed per execution -- + if high_elapsed: + detail = "Queries taking > 1 sec per execution:\n\n" + id_col = "sql_id" if is_oracle else "queryid" + detail += ( + f"| {id_col} | Avg Elapsed | Total Elapsed | Execs | Query |\n" + f"| --- | --- | --- | --- | --- |\n" + ) + for r in high_elapsed[:10]: if is_oracle: - table = row.get("table_name", "?") - owner = row.get("object_owner", "") - sid = row.get("sql_id", "?") - execs = _safe_int(row.get("executions", 0)) - elapsed = _safe_float(row.get("elapsed_sec", 0)) - gets = _safe_int(row.get("buffer_gets", 0)) - sql_text = str(row.get("sql_text") or "") - parts.append( - f"**Table: `{owner}.{table}`** — sql_id: `{sid}`, " - f"{execs:,} execs, {elapsed:.2f}s elapsed, " - f"{gets:,} buffer gets" - ) - if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```") - action_idx += 1 - action_items.append( - f"{action_idx}. **[FULL TABLE SCAN]** `{owner}.{table}` " - f"via sql_id `{sid}`. Add index on columns in WHERE clause " - f"or use hints to force index access." - ) + sid = r.get("sql_id", "?") + avg_e = _safe_float(r.get("avg_elapsed_sec", 0)) + tot_e = _safe_float(r.get("total_elapsed_sec", 0)) + execs = _safe_int(r.get("executions", 0)) + sql = _truncate_sql(str(r.get("sql_text", "")), 60) else: - table = row.get("relname", "?") - schema = row.get("schemaname", "public") - seq_scans = _safe_int(row.get("seq_scan", 0)) - seq_tup_read = _safe_int(row.get("seq_tup_read", 0)) - idx_scans = _safe_int(row.get("idx_scan", 0)) - live_tup = _safe_int(row.get("n_live_tup", 0)) - size_mb = _safe_float(row.get("table_size_mb", 0)) - avg_rows = _safe_int(row.get("avg_rows_per_seq_scan", 0)) - parts.append( - f"**Table: `{schema}.{table}`** — " - f"{seq_scans:,} seq scans ({avg_rows:,} rows/scan avg, " - f"{seq_tup_read:,} rows read), " - f"{idx_scans:,} idx scans, {live_tup:,} live rows, " - f"{size_mb:.1f} MB" - ) - if seq_scans > 100 and live_tup > 10000: - action_idx += 1 - action_items.append( - f"{action_idx}. **[SEQ SCAN]** `{schema}.{table}` has " - f"{seq_scans:,} seq scans on {live_tup:,} rows " - f"({size_mb:.1f} MB). Add indexes on frequently " - f"filtered columns: " - f"`CREATE INDEX ON {schema}.{table} (column_name);`" - ) - parts.append("") + sid = str(r.get("queryid", "?")) + avg_e = _safe_float(r.get("avg_elapsed_sec", 0)) + tot_e = _safe_float(r.get("total_exec_sec", 0)) + execs = _safe_int(r.get("calls", 0)) + sql = _truncate_sql(str(r.get("query_text", "")), 60) + detail += ( + f"| `{sid}` | {_fmt_secs(avg_e)} | {_fmt_secs(tot_e)} " + f"| {execs:,} | `{sql}` |\n" + ) + bottlenecks.append((2, "High Elapsed Time per Execution", detail)) + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. Tune slow queries -- " + f"{len(high_elapsed)} queries > 1 sec/exec", + ) + ) - # --- Execution Plans (Oracle) --------------------------------------------- - if is_oracle: - plans = data.get("execution_plans", []) - if isinstance(plans, list) and plans: - parts.append("## Execution Plans (Top SQL)") - parts.append("") - for plan in plans[:5]: - sid = plan.get("sql_id", "?") - steps = plan.get("steps", []) - parts.append(f"### Plan for sql_id: `{sid}`") - has_full_scan = False - has_hash_join = False - for step in steps[:20]: - op = str(step.get("operation", "")) - obj = step.get("object_name", "") - cost = step.get("cost", "") - est = step.get("est_rows", "") - line = f"- {op}" - if obj: - line += f" on `{obj}`" - if cost: - line += f" (cost={cost}, rows={est})" - parts.append(line) - if "FULL" in op.upper(): - has_full_scan = True - if "HASH JOIN" in op.upper(): - has_hash_join = True - if has_full_scan: - action_idx += 1 - action_items.append( - f"{action_idx}. **[PLAN]** sql_id `{sid}` has TABLE ACCESS " - f"FULL in plan. Add appropriate index." + # -- High execution count -- + if high_exec: + detail = "Queries with > 1,000 executions (high frequency):\n\n" + id_col = "sql_id" if is_oracle else "queryid" + detail += ( + f"| {id_col} | Calls | Total Elapsed | Avg Elapsed | Query |\n" + f"| --- | --- | --- | --- | --- |\n" + ) + for r in high_exec[:10]: + if is_oracle: + sid = r.get("sql_id", "?") + execs = _safe_int(r.get("executions", 0)) + tot_e = _safe_float(r.get("total_elapsed_sec", 0)) + avg_e = tot_e / max(execs, 1) + sql = _truncate_sql(str(r.get("sql_text", "")), 60) + else: + sid = str(r.get("queryid", "?")) + execs = _safe_int(r.get("calls", 0)) + tot_e = _safe_float(r.get("total_exec_sec", 0)) + avg_e = _safe_float(r.get("mean_exec_sec", 0)) + sql = _truncate_sql(str(r.get("query_text", "")), 60) + detail += ( + f"| `{sid}` | {execs:,} | {_fmt_secs(tot_e)} " + f"| {_fmt_secs(avg_e)} | `{sql}` |\n" + ) + bottlenecks.append((2, "High Execution Count Queries", detail)) + + # -- Full table scans / Sequential scans -- + if fts: + if is_oracle: + detail = "Full table scans detected:\n\n" + detail += ( + "| sql_id | Table | Executions | Elapsed | Query |\n" + "| --- | --- | --- | --- | --- |\n" + ) + for r in fts[:10]: + owner = r.get("object_owner", "") + table = r.get("table_name", "?") + sid = r.get("sql_id", "?") + execs = _safe_int(r.get("executions", 0)) + elapsed = _safe_float(r.get("elapsed_sec", 0)) + sql = _truncate_sql(str(r.get("sql_text", "")), 60) + detail += ( + f"| `{sid}` | `{owner}.{table}` | {execs:,} " + f"| {_fmt_secs(elapsed)} | `{sql}` |\n" + ) + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. **Add index** on `{owner}.{table}` " + f"for sql_id `{sid}` (full table scan, " + f"{execs:,} execs, {_fmt_secs(elapsed)})", ) - if has_hash_join: - action_idx += 1 - action_items.append( - f"{action_idx}. **[PLAN]** sql_id `{sid}` uses HASH JOIN. " - f"Ensure join columns are indexed for NESTED LOOPS " - f"if table is small." + ) + else: + detail = "Tables with heavy sequential scans:\n\n" + detail += ( + "| Table | Seq Scans | Rows/Scan | Size | " + "Idx Scans | Live Rows |\n" + "| --- | --- | --- | --- | --- | --- |\n" + ) + for r in fts[:10]: + schema = r.get("schemaname", "public") + table = r.get("relname", "?") + ss = _safe_int(r.get("seq_scan", 0)) + avg_r = _safe_int(r.get("avg_rows_per_seq_scan", 0)) + sz = _safe_float(r.get("table_size_mb", 0)) + idx_s = _safe_int(r.get("idx_scan", 0)) + live = _safe_int(r.get("n_live_tup", 0)) + detail += ( + f"| `{schema}.{table}` | {ss:,} | {avg_r:,} " + f"| {sz:.1f} MB | {idx_s:,} | {live:,} |\n" + ) + if ss > 100 and live > 10000: + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. **Add index** on `{schema}.{table}` -- " + f"{ss:,} seq scans on {live:,} rows ({sz:.1f} MB)", + ) ) - parts.append("") + sev = 1 if len(fts) > 5 else 2 + bottlenecks.append((sev, "Full Table Scans / Sequential Scans", detail)) + risks.append(("Medium-High", "High", "I/O amplification from table scans")) - # --- Oracle Parallel Queries ---------------------------------------------- - if is_oracle: - px_rows = _get_rows(data, "parallel_queries") - if px_rows: - parts.append("## Parallel Queries") - for row in px_rows: - sid = row.get("sql_id", "?") - px = _safe_int(row.get("px_servers", 0)) - elapsed = _safe_float(row.get("elapsed_sec", 0)) - sql_text = str(row.get("sql_text") or "") - parts.append( - f"- **sql_id: `{sid}`** — {px:,} PX servers, {elapsed:.2f}s elapsed" + # -- Contention & locking -- + if contention: + if is_oracle: + detail = "Contention/lock wait events:\n\n" + detail += ( + "| Event | Waits | Time Waited | Avg Wait |\n" + "| --- | --- | --- | --- |\n" + ) + for r in contention[:10]: + event = r.get("event", "?") + waits = _safe_int(r.get("total_waits", 0)) + tw = _safe_float(r.get("time_waited_sec", 0)) + aw = _safe_float(r.get("avg_wait_sec", 0)) + detail += ( + f"| {event} | {waits:,} | {_fmt_secs(tw)} | {_fmt_secs(aw)} |\n" ) - if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") - parts.append("") - - # --- Row Contention & Locking --------------------------------------------- - contention_key = "row_contention" if is_oracle else "lock_waits" - rows = _get_rows(data, contention_key) - parts.append("## Row Contention & Locking") - if not rows: - parts.append("No active contention detected.\n") - else: - parts.append("") - for row in rows: - if is_oracle: - event = row.get("event", "?") - waits = _safe_int(row.get("total_waits", 0)) - waited_sec = _safe_float(row.get("time_waited_sec", 0)) - avg_wait = _safe_float(row.get("avg_wait_sec", 0)) - parts.append( - f"**Event: `{event}`** — {waits:,} waits, " - f"{waited_sec:.2f}s total, avg {avg_wait:.4f}s/wait" + else: + detail = "Active lock waits:\n\n" + detail += ( + "| PID | User | Wait Event | Running | Query |\n" + "| --- | --- | --- | --- | --- |\n" + ) + for r in contention[:10]: + pid = r.get("pid", "?") + user = r.get("usename", "?") + we = f"{r.get('wait_event_type', '')}:{r.get('wait_event', '')}" + dur = _safe_float(r.get("running_sec", 0)) + q = _truncate_sql(str(r.get("query", "")), 60) + detail += f"| {pid} | {user} | {we} | {_fmt_secs(dur)} | `{q}` |\n" + bottlenecks.append((2, "Row Contention & Locking", detail)) + risks.append(("Medium", "High", "Lock escalation / deadlock risk")) + + # -- Wait events -- + if wait_rows and not contention: + detail = "Top wait events:\n\n" + detail += "| Event | Waits | Time Waited |\n| --- | --- | --- |\n" + for r in wait_rows[:10]: + event = r.get("event", r.get("event_name", "?")) + waits = _safe_int(r.get("total_waits", 0)) + tw = _safe_float(r.get("time_waited_sec", 0)) + detail += f"| {event} | {waits:,} | {_fmt_secs(tw)} |\n" + bottlenecks.append((2, "Top Wait Events", detail)) + + # -- Table sizes & bloat (PG) -- + if table_sizes: + detail = "Largest tables:\n\n" + detail += ( + "| Table | Total Size | Table Size | TOAST+Idx | " + "Live Rows | Ins | Upd | Del |\n" + "| --- | --- | --- | --- | --- | --- | --- | --- |\n" + ) + for r in table_sizes[:10]: + schema = r.get("schemaname", "public") + table = r.get("relname", "?") + total = _safe_float(r.get("total_size_mb", 0)) + tbl = _safe_float(r.get("table_size_mb", 0)) + toast = _safe_float(r.get("toast_index_size_mb", 0)) + live = _safe_int(r.get("n_live_tup", 0)) + ins = _safe_int(r.get("n_tup_ins", 0)) + upd = _safe_int(r.get("n_tup_upd", 0)) + dele = _safe_int(r.get("n_tup_del", 0)) + total_str = f"{total / 1024:.1f} GB" if total >= 1024 else f"{total:.0f} MB" + tbl_str = f"{tbl / 1024:.1f} GB" if tbl >= 1024 else f"{tbl:.0f} MB" + toast_str = f"{toast / 1024:.1f} GB" if toast >= 1024 else f"{toast:.0f} MB" + detail += ( + f"| `{schema}.{table}` | {total_str} | {tbl_str} " + f"| {toast_str} | {live:,} | {ins:,} | {upd:,} | {dele:,} |\n" + ) + if total > 10240: + risks.append( + ( + "Medium", + "Medium", + f"`{schema}.{table}` is {total_str} -- consider partitioning", + ) ) - if waited_sec > 1: - action_idx += 1 - action_items.append( - f"{action_idx}. **[CONTENTION]** `{event}` — " - f"{waited_sec:.2f}s total. Reduce hot-row updates, " - f"increase INITRANS, or tune locking strategy." + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. **Partition** `{schema}.{table}` " + f"({total_str}) -- time-based or business key", ) - else: - pid = row.get("pid", "?") - user = row.get("usename", "?") - event = row.get("wait_event", "?") - event_type = row.get("wait_event_type", "") - running_sec = _safe_float(row.get("running_sec", 0)) - state = row.get("state", "") - query = str(row.get("query") or "") - parts.append( - f"**PID {pid}** (user: {user}, state: {state}) — " - f"wait: {event_type}/{event}, running {running_sec:.2f}s" ) - if query: - parts.append(f"```sql\n{_truncate_sql(query, 300)}\n```") - if running_sec > 60: - action_idx += 1 - action_items.append( - f"{action_idx}. **[LONG WAIT]** PID {pid} waiting on " - f"{event_type}/{event} for {running_sec:.0f}s. " - f"Consider `SELECT pg_cancel_backend({pid});`" + if toast > tbl and toast > 1024: + act_idx += 1 + actions.append( + ( + 2, + f"{act_idx}. Review TOAST usage on `{schema}.{table}` " + f"-- TOAST+Idx ({toast_str}) > table ({tbl_str})", ) - parts.append("") - - # --- Wait Events (Oracle / AWR) ------------------------------------------- - if is_oracle: - wait_rows = _get_rows(data, "wait_events") or _get_rows(data, "awr_wait_events") - if wait_rows: - parts.append("## Top Wait Events") - parts.append("") - for row in wait_rows[:15]: - event = row.get("event", "?") - waits = _safe_int(row.get("total_waits", 0)) - waited = _safe_float(row.get("time_waited_sec", 0)) - avg_w = _safe_float(row.get("avg_wait_sec", 0)) - line = f"- **`{event}`** — {waits:,} waits, {waited:.2f}s total" - if avg_w > 0: - line += f", avg {avg_w:.4f}s" - parts.append(line) - if waited > 60: - action_idx += 1 - action_items.append( - f"{action_idx}. **[WAIT]** `{event}` — " - f"{waited:.2f}s total wait time. " - f"Investigate root cause (I/O, lock, latch)." + ) + bottlenecks.append((2, "Table Sizes & Storage", detail)) + + # -- Bloat (PG) -- + if bloat_rows: + high_bloat = [r for r in bloat_rows if _safe_float(r.get("dead_pct", 0)) > 20] + if high_bloat: + detail = "Tables with significant bloat (dead tuples > 20%):\n\n" + detail += ( + "| Table | Dead % | Dead Tuples | Size | Last Vacuum |\n" + "| --- | --- | --- | --- | --- |\n" + ) + for r in high_bloat[:10]: + schema = r.get("schemaname", "public") + table = r.get("relname", "?") + dp = _safe_float(r.get("dead_pct", 0)) + dead = _safe_int(r.get("n_dead_tup", 0)) + sz = _safe_float(r.get("table_size_mb", 0)) + lv = r.get("last_autovacuum", "never") or "never" + detail += ( + f"| `{schema}.{table}` | {dp:.1f}% | {dead:,} " + f"| {sz:.0f} MB | {lv} |\n" + ) + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. **VACUUM FULL** `{schema}.{table}` -- " + f"{dp:.1f}% dead tuples ({dead:,}): " + f"`VACUUM (VERBOSE, ANALYZE) {schema}.{table};`", ) - parts.append("") - - # --- pgProfile Wait Events ------------------------------------------------ - if not is_oracle: - pgp_wait_rows = _get_rows(data, "pgprofile_wait_events") - if pgp_wait_rows: - parts.append("## Wait Events (pgProfile)") - for row in pgp_wait_rows[:15]: - etype = row.get("event_type", "?") - event = row.get("event", "?") - waits = _safe_int(row.get("total_waits", 0)) - waited = _safe_float(row.get("total_waited_sec", 0)) - parts.append(f"- **{etype}/{event}** — {waits:,} waits, {waited:.2f}s") - parts.append("") + ) + bottlenecks.append((1, "Table Bloat", detail)) + risks.append(("High", "Severe", "Disk exhaustion from bloat")) - # --- Sequence Caching Issues ----------------------------------------------- - seq_key = "sequence_no_cache" if is_oracle else "sequence_cache_issues" - rows = _get_rows(data, seq_key) - parts.append("## Sequence Caching Issues") - if not rows: - parts.append("No issues found.\n") - else: - parts.append("") - for row in rows: + # -- Stale statistics / missing vacuum -- + stale_critical: list[dict[str, Any]] = [] + if stale_rows: + for r in stale_rows: if is_oracle: - owner = row.get("sequence_owner", "") - name = row.get("sequence_name", "?") - cache = _safe_int(row.get("cache_size", 0)) - parts.append( - f"**`{owner}.{name}`** — cache_size={cache} (should be >= 20)" - ) - action_idx += 1 - action_items.append( - f"{action_idx}. **[SEQUENCE]** " - f"`ALTER SEQUENCE {owner}.{name} CACHE 20;`" - ) + stale = r.get("stale_stats", "") + days = _safe_float(r.get("days_since_analyzed", 0)) + if stale == "YES" or days > 7: + stale_critical.append(r) else: - schema = row.get("schemaname", "public") - name = row.get("sequencename", "?") - cache = _safe_int(row.get("cache_size") or 0) - parts.append( - f"**`{schema}.{name}`** — cache_size={cache} (should be >= 20)" + dead_pct = _safe_float(r.get("dead_pct", 0)) + la = r.get("last_analyze") or r.get("last_autoanalyze") + if dead_pct > 10 or not la: + stale_critical.append(r) + if stale_critical: + detail = "Tables with stale/missing statistics:\n\n" + if is_oracle: + detail += ( + "| Table | Rows | Last Analyzed | Days Stale |\n" + "| --- | --- | --- | --- |\n" + ) + for r in stale_critical[:15]: + table = r.get("table_name", "?") + rows = _safe_int(r.get("num_rows", 0)) + la = r.get("last_analyzed", "never") + days = _safe_float(r.get("days_since_analyzed", 0)) + detail += f"| `{table}` | {rows:,} | {la} | {days:.0f} |\n" + act_idx += 1 + actions.append( + ( + 2, + f"{act_idx}. `EXEC DBMS_STATS.GATHER_TABLE_STATS" + f"(ownname=>USER, tabname=>'{table}');`", + ) ) - action_idx += 1 - action_items.append( - f"{action_idx}. **[SEQUENCE]** " - f"`ALTER SEQUENCE {schema}.{name} CACHE 20;`" + else: + detail += ( + "| Table | Dead % | Dead Tuples | Last Analyze |\n" + "| --- | --- | --- | --- |\n" + ) + for r in stale_critical[:15]: + schema = r.get("schemaname", "public") + table = r.get("relname", "?") + dp = _safe_float(r.get("dead_pct", 0)) + dead = _safe_int(r.get("n_dead_tup", 0)) + la = r.get("last_analyze") or r.get("last_autoanalyze") or "never" + detail += f"| `{schema}.{table}` | {dp:.1f}% | {dead:,} | {la} |\n" + act_idx += 1 + actions.append((2, f"{act_idx}. `ANALYZE {schema}.{table};`")) + bottlenecks.append((2, "Stale Statistics / Missing Vacuum", detail)) + + # -- Unused indexes -- + if unused_idx: + total_waste_mb = sum(_safe_float(r.get("index_size_mb", 0)) for r in unused_idx) + detail = ( + f"**{len(unused_idx)} unused indexes** " + f"consuming **{total_waste_mb:.0f} MB**:\n\n" + "| Index | Table | Size |\n" + "| --- | --- | --- |\n" + ) + for r in unused_idx[:15]: + if is_oracle: + idx = r.get("index_name", "?") + table = r.get("table_name", "?") + sz = _safe_float(r.get("index_rows", 0)) + detail += f"| `{idx}` | `{table}` | {sz:,} rows |\n" + else: + schema = r.get("schemaname", "public") + idx = r.get("indexrelname", "?") + table = r.get("relname", "?") + sz = _safe_float(r.get("index_size_mb", 0)) + detail += f"| `{schema}.{idx}` | `{table}` | {sz:.0f} MB |\n" + act_idx += 1 + actions.append( + ( + 2, + f"{act_idx}. `DROP INDEX {schema}.{idx};` " + f"-- never used, {sz:.0f} MB", + ) ) - parts.append("") + bottlenecks.append((3, "Unused Indexes", detail)) - # --- Stale Statistics / Vacuum / Bloat ------------------------------------ - if is_oracle: - rows = _get_rows(data, "stale_statistics") - else: - rows = _get_rows(data, "stale_stats_vacuum") + _get_rows(data, "bloat_estimate") - seen_tables: set[str] = set() - deduped: list[dict[str, Any]] = [] - for r in rows: - key = f"{r.get('schemaname', '')}.{r.get('relname', '')}" - if key not in seen_tables: - seen_tables.add(key) - deduped.append(r) - rows = deduped - - parts.append("## Stale Statistics / Vacuum / Bloat") - if not rows: - parts.append("No issues found.\n") - else: - parts.append("") - for row in rows: + # -- Sequence caching -- + if seqs: + detail = "Sequences with no/low caching (cache_size <= 1):\n\n" + detail += "| Sequence | Cache Size |\n| --- | --- |\n" + for r in seqs[:15]: if is_oracle: - table = row.get("table_name", "?") - num_rows = _safe_int(row.get("num_rows", 0)) - stale = row.get("stale_stats", "?") - last_analyzed = row.get("last_analyzed", "never") - days = _safe_float(row.get("days_since_analyzed", 0)) - parts.append( - f"**`{table}`** — {num_rows:,} rows, stale={stale}, " - f"last analyzed: {last_analyzed} ({days:.0f} days ago)" - ) - action_idx += 1 - action_items.append( - f"{action_idx}. **[STALE STATS]** " - f"`EXEC DBMS_STATS.GATHER_TABLE_STATS" - f"(ownname=>USER, tabname=>'{table}');`" - ) + name = f"{r.get('sequence_owner', '')}.{r.get('sequence_name', '?')}" + cache = _safe_int(r.get("cache_size", 0)) else: - schema = row.get("schemaname", "public") - table = row.get("relname", "?") - dead = _safe_int(row.get("n_dead_tup", 0)) - live = _safe_int(row.get("n_live_tup", 0)) - dead_pct = _safe_float(row.get("dead_pct", 0)) - last_vac = ( - row.get("last_autovacuum") or row.get("last_vacuum") or "never" - ) - last_analyze = ( - row.get("last_autoanalyze") or row.get("last_analyze") or "never" - ) - parts.append( - f"**`{schema}.{table}`** — {live:,} live, {dead:,} dead " - f"({dead_pct:.1f}% bloat), last vacuum: {last_vac}, " - f"last analyze: {last_analyze}" - ) - if dead_pct > 20 or dead > 50000: - action_idx += 1 - action_items.append( - f"{action_idx}. **[BLOAT]** " - f"`VACUUM ANALYZE {schema}.{table};` " - f"— {dead_pct:.1f}% dead tuples" - ) - elif str(last_analyze) in ("never", "None"): - action_idx += 1 - action_items.append( - f"{action_idx}. **[STALE STATS]** " - f"`ANALYZE {schema}.{table};` — never analyzed" - ) - parts.append("") + name = f"{r.get('schemaname', 'public')}.{r.get('sequencename', '?')}" + cache = _safe_int(r.get("cache_size", 0)) + detail += f"| `{name}` | {cache} |\n" + detail += ( + "\n**Fix:** Increase cache size to reduce contention:\n" + "```sql\nALTER SEQUENCE seq_name CACHE 100;\n```" + ) + bottlenecks.append((3, "Sequence Caching Issues", detail)) + + # -- Temp file usage (PG) -- + if temp_sql_rows: + detail = "Queries spilling to temp files:\n\n" + detail += ( + "| queryid | Temp MB | Calls | Elapsed | Query |\n" + "| --- | --- | --- | --- | --- |\n" + ) + for r in temp_sql_rows[:10]: + qid = str(r.get("queryid", "?")) + tmb = _safe_float(r.get("temp_mb", 0)) + calls = _safe_int(r.get("calls", 0)) + elapsed = _safe_float(r.get("total_exec_sec", 0)) + sql = _truncate_sql(str(r.get("query_text", "")), 60) + detail += ( + f"| `{qid}` | {tmb:.1f} | {calls:,} " + f"| {_fmt_secs(elapsed)} | `{sql}` |\n" + ) + detail += "\n**Fix:** Increase `work_mem` or optimise query to reduce sorting." + bottlenecks.append((2, "Temp File Usage", detail)) + act_idx += 1 + actions.append( + ( + 2, + f"{act_idx}. Increase `work_mem` -- " + f"{len(temp_sql_rows)} queries spilling to disk", + ) + ) - # --- Unused Indexes ------------------------------------------------------- - rows = _get_rows(data, "unused_indexes") - if rows: - parts.append("## Unused Indexes") - parts.append("") - for row in rows: - schema = row.get("schemaname", "public") - table = row.get("relname", "?") - idx_name = row.get("indexrelname", "?") - size_mb = _safe_float(row.get("index_size_mb", 0)) - parts.append( - f"**`{schema}.{idx_name}`** on `{table}` — {size_mb:.1f} MB, 0 scans" + # -- Checkpoint issues (PG) -- + if ckpt_rows: + ck = ckpt_rows[0] + req = _safe_int(ck.get("checkpoints_req", 0)) + timed = _safe_int(ck.get("checkpoints_timed", 0)) + buffers_ckpt = _safe_int(ck.get("buffers_checkpoint", 0)) + buffers_be = _safe_int(ck.get("buffers_backend", 0)) + backend_pct = 0.0 + if buffers_ckpt + buffers_be > 0: + backend_pct = buffers_be / (buffers_ckpt + buffers_be) * 100 + if req > timed and timed > 0: + detail = ( + f"Requested checkpoints ({req:,}) **exceed** timed " + f"checkpoints ({timed:,})\n\n" + f"Backend write %: {backend_pct:.1f}%\n\n" + "**Fix:** Increase `max_wal_size` and `checkpoint_timeout`." ) - if size_mb > 1: - action_idx += 1 - action_items.append( - f"{action_idx}. **[UNUSED INDEX]** " - f"`DROP INDEX {schema}.{idx_name};` — " - f"{size_mb:.1f} MB wasted" + bottlenecks.append((2, "Checkpoint Pressure", detail)) + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. Increase `max_wal_size` -- " + f"requested checkpoints ({req:,}) > timed ({timed:,})", ) - parts.append("") - - # --- Table Stats (PostgreSQL) — top tables by activity -------------------- - if not is_oracle: - tbl_rows = _get_rows(data, "table_stats") - if tbl_rows: - parts.append("## Top Tables by Activity") - parts.append("") - for row in tbl_rows[:10]: - schema = row.get("schemaname", "public") - table = row.get("relname", "?") - seq_scan = _safe_int(row.get("seq_scan", 0)) - idx_scan = _safe_int(row.get("idx_scan", 0)) - inserts = _safe_int(row.get("n_tup_ins", 0)) - updates = _safe_int(row.get("n_tup_upd", 0)) - deletes = _safe_int(row.get("n_tup_del", 0)) - live = _safe_int(row.get("n_live_tup", 0)) - dead = _safe_int(row.get("n_dead_tup", 0)) - parts.append( - f"- **`{schema}.{table}`** — seq: {seq_scan:,}, " - f"idx: {idx_scan:,}, ins/upd/del: " - f"{inserts:,}/{updates:,}/{deletes:,}, " - f"live: {live:,}, dead: {dead:,}" + ) + if backend_pct > 20: + detail_be = ( + f"**{backend_pct:.1f}%** of buffers written by backends " + f"(should be < 5%)\n\n" + "**Fix:** Increase `shared_buffers`, tune `bgwriter_*` params." + ) + bottlenecks.append((2, "Backend Buffer Writes", detail_be)) + + # -- Replication lag -- + if repl_rows: + for r in repl_rows: + replay_lag = _safe_float(r.get("replay_lag_sec", 0)) + client = r.get("client_addr", "?") + state = r.get("state", "?") + if replay_lag > 10: + bottlenecks.append( + ( + 1 if replay_lag > 60 else 2, + f"Replication Lag ({client})", + f"Replica `{client}` ({state}): " + f"replay lag = **{_fmt_secs(replay_lag)}**", + ) + ) + risks.append( + ( + "Medium-High", + "High", + f"Replication lag {_fmt_secs(replay_lag)} on {client}", + ) ) - parts.append("") - # --- Checkpoint / WAL Issues (PostgreSQL) --------------------------------- - if not is_oracle: - cp_rows = _get_rows(data, "checkpoint_stats") - parts.append("## Checkpoint / WAL Issues") - has_issue = False - if cp_rows: - row = cp_rows[0] - backend_pct = _safe_float(row.get("backend_write_pct", 0)) - req = _safe_int(row.get("checkpoints_req", 0)) - timed = _safe_int(row.get("checkpoints_timed", 0)) - buf_cp = _safe_int(row.get("buffers_checkpoint", 0)) - buf_clean = _safe_int(row.get("buffers_clean", 0)) - buf_backend = _safe_int(row.get("buffers_backend", 0)) - parts.append( - f"- Checkpoints: {timed:,} timed, {req:,} requested\n" - f"- Buffers: checkpoint={buf_cp:,}, clean={buf_clean:,}, " - f"backend={buf_backend:,}\n" - f"- Backend write %: {backend_pct:.1f}%" + # -- Oracle SGA info -- + sga_rows = _get_rows(data, "sga_info") + if sga_rows: + detail = "SGA Memory Allocation:\n\n" + detail += "| Component | Size |\n| --- | --- |\n" + for r in sga_rows: + name = r.get("name", "?") + sz = _safe_float(r.get("size_mb", 0)) + sz_str = f"{sz / 1024:.1f} GB" if sz >= 1024 else f"{sz:.0f} MB" + detail += f"| {name} | {sz_str} |\n" + bottlenecks.append((3, "SGA Configuration", detail)) + + # -- Oracle tablespace I/O -- + ts_io_rows = _get_rows(data, "tablespace_io") + if ts_io_rows: + detail = "Tablespace I/O:\n\n" + detail += ( + "| Tablespace | Phys Reads | Phys Writes | " + "Read Time | Write Time |\n" + "| --- | --- | --- | --- | --- |\n" + ) + for r in ts_io_rows[:10]: + ts = r.get("tablespace_name", "?") + pr = _safe_int(r.get("physical_reads", 0)) + pw = _safe_int(r.get("physical_writes", 0)) + rt = _safe_float(r.get("read_time_sec", 0)) + wt = _safe_float(r.get("write_time_sec", 0)) + detail += ( + f"| {ts} | {pr:,} | {pw:,} | {_fmt_secs(rt)} | {_fmt_secs(wt)} |\n" ) - if backend_pct > 10: - has_issue = True - action_idx += 1 - action_items.append( - f"{action_idx}. **[CHECKPOINT]** Backend writes are " - f"{backend_pct:.1f}% of total — increase " - f"`shared_buffers` and `checkpoint_completion_target`." + bottlenecks.append((3, "Tablespace I/O", detail)) + + # -- Oracle redo log switches -- + redo_rows = _get_rows(data, "redo_log_switches") + if redo_rows: + max_switches = max(_safe_int(r.get("switches", 0)) for r in redo_rows) + if max_switches > 10: + detail = "Redo log switches per hour:\n\n" + detail += "| Hour | Switches |\n| --- | --- |\n" + for r in redo_rows[:12]: + detail += ( + f"| {r.get('switch_hour', '?')} " + f"| {_safe_int(r.get('switches', 0))} |\n" ) - if req > timed and timed > 0: - has_issue = True - action_idx += 1 - action_items.append( - f"{action_idx}. **[CHECKPOINT]** More requested ({req:,}) " - f"than timed ({timed:,}) checkpoints — increase " - f"`max_wal_size`." + detail += ( + f"\nPeak: **{max_switches} switches/hour** -- " + f"consider increasing redo log size." + ) + bottlenecks.append((2, "High Redo Log Switches", detail)) + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. Increase redo log size -- " + f"peak {max_switches} switches/hour", ) - if not has_issue: - parts.append("No issues found.") - parts.append("") + ) - # --- Temp File Usage (PostgreSQL) ----------------------------------------- - if not is_oracle: - rows = _get_rows(data, "temp_file_usage") - if rows: - parts.append("## Temp File Usage") - parts.append("") - for row in rows[:10]: - sid = row.get("queryid", "?") - temp_mb = _safe_float(row.get("temp_mb", 0)) - calls = _safe_int(row.get("calls", 0)) - sql_text = str(row.get("query_text") or "") - parts.append( - f"**queryid: `{sid}`** — {temp_mb:.1f} MB temp, {calls:,} calls" - ) - if sql_text: - parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```") - if temp_mb > 100: - action_idx += 1 - action_items.append( - f"{action_idx}. **[TEMP FILES]** queryid `{sid}` uses " - f"{temp_mb:.1f} MB temp. Increase `work_mem` or " - f"optimize sort/join." + # -- Oracle temp usage -- + temp_rows = _get_rows(data, "temp_usage") + if temp_rows: + for r in temp_rows: + pct = _safe_float(r.get("pct_used", 0)) + if pct > 80: + ts = r.get("tablespace_name", "?") + used = _safe_float(r.get("used_mb", 0)) + free = _safe_float(r.get("free_mb", 0)) + bottlenecks.append( + ( + 2, + f"Temp Tablespace `{ts}` at {pct:.0f}%", + f"Used: {used:.0f} MB, Free: {free:.0f} MB", ) - parts.append("") + ) + risks.append(("Medium", "High", f"Temp space exhaustion on {ts}")) # ===================================================================== - # EXECUTIVE SUMMARY & ACTION PLAN + # PHASE 3 -- Generate formatted report # ===================================================================== - summary_parts: list[str] = [] - high_elapsed = _get_rows(data, "high_elapsed_per_exec") - high_exec = _get_rows(data, "high_execution_count") - fts = _get_rows(data, "full_table_scans" if is_oracle else "seq_scan_tables") - contention = _get_rows(data, "row_contention" if is_oracle else "lock_waits") - seqs = _get_rows( - data, "sequence_no_cache" if is_oracle else "sequence_cache_issues" - ) - top_sql = ( - _get_rows(data, "top_cpu_sql" if is_oracle else "top_cpu_queries") - or _get_rows(data, "awr_top_sql") - or _get_rows(data, "pgprofile_top_sql") - ) + parts: list[str] = [] - if top_sql: - summary_parts.append(f"{len(top_sql)} top SQL statements analysed") - if high_elapsed: - summary_parts.append( - f"{len(high_elapsed)} queries with high elapsed time per execution" + # --- Header --- + parts.append(f"# Performance Analysis Report -- {db_type.upper()}") + parts.append("*Programmatic analysis v2 -- no LLM involved*\n") + parts.append("---\n") + + # --- 1. Executive Summary --- + sev1 = [b for b in bottlenecks if b[0] == 1] + sev2 = [b for b in bottlenecks if b[0] == 2] + sev3 = [b for b in bottlenecks if b[0] == 3] + + if sev1: + health = "CRITICAL -- immediate action required" + elif sev2: + health = "WARNING -- important issues found" + elif sev3: + health = "ADVISORY -- minor improvements possible" + else: + health = "HEALTHY -- no significant issues detected" + + parts.append("## 1. Executive Summary\n") + parts.append(f"**Overall health:** {health}\n") + + headlines: list[str] = [] + if not is_oracle: + headlines.append(f"Buffer cache hit ratio: **{cache_hit:.2f}%**") + headlines.append(f"Active backends: **{backends}**") + headlines.append( + f"Transactions: **{commits:,}** commits, **{rollbacks:,}** rollbacks" ) - if high_exec: - summary_parts.append( - f"{len(high_exec)} queries with very high execution counts" + if wal_bytes > 0: + headlines.append(f"WAL generated: **{_fmt_bytes(wal_bytes)}**") + if temp_bytes > 0: + headlines.append( + f"Temp files: **{temp_files:,}** files, **{_fmt_bytes(temp_bytes)}**" + ) + else: + headlines.append(f"Buffer cache hit ratio: **{ora_cache_hit:.2f}%**") + headlines.append(f"Hard parse ratio: **{ora_hard_parse_pct:.1f}%**") + if ora_commit_count + ora_rb_count > 0: + headlines.append( + f"Transactions: **{ora_commit_count:,}** commits, " + f"**{ora_rb_count:,}** rollbacks" + ) + headlines.append( + f"Issues found: **{len(sev1)}** critical, " + f"**{len(sev2)}** important, **{len(sev3)}** advisory" + ) + for h in headlines: + parts.append(f"- {h}") + parts.append("") + + # --- 2. Database & Workload Distribution --- + parts.append("## 2. Database & Workload Overview\n") + if not is_oracle: + parts.append( + "| Metric | Value |\n" + "| --- | --- |\n" + f"| Cache hit ratio | {cache_hit:.2f}% |\n" + f"| Active backends | {backends} |\n" + f"| Commits | {commits:,} |\n" + f"| Rollbacks | {rollbacks:,} |\n" + f"| Blocks hit | {blks_hit:,} |\n" + f"| Blocks read (disk) | {blks_read:,} |\n" + f"| Temp files | {temp_files:,} |\n" + f"| Temp bytes | {_fmt_bytes(temp_bytes)} |" ) - if fts: - summary_parts.append( - f"{len(fts)} " - f"{'full table scans' if is_oracle else 'tables with heavy seq scans'}" + if wal_bytes > 0: + parts.append( + f"| WAL generated | {_fmt_bytes(wal_bytes)} |\n" + f"| WAL FPI count | {wal_fpi:,} |\n" + f"| WAL sync time | {wal_sync_time_ms / 1000:.1f} sec |\n" + f"| WAL write time | {wal_write_time_ms / 1000:.1f} sec |" + ) + else: + parts.append( + "| Metric | Value |\n" + "| --- | --- |\n" + f"| Buffer cache hit | {ora_cache_hit:.2f}% |\n" + f"| Hard parse ratio | {ora_hard_parse_pct:.1f}% |\n" + f"| Disk sort ratio | {ora_disk_sort_pct:.1f}% |\n" + f"| Commits | {ora_commit_count:,} |\n" + f"| Rollbacks | {ora_rb_count:,} |" ) - if contention: - summary_parts.append(f"{len(contention)} contention/lock wait events") - if seqs: - summary_parts.append(f"{len(seqs)} sequences with no/low caching") + parts.append("") - exec_summary = ( - "Found: " + "; ".join(summary_parts) + "." - if summary_parts - else "No significant performance issues detected in the collected data." - ) + # Connection distribution + if conn_rows: + parts.append("**Connection Distribution:**\n") + parts.append("| State | Count | Wait Type |\n| --- | --- | --- |") + for r in conn_rows: + state = r.get("state", "unknown") or "null" + count = _safe_int(r.get("count", 0)) + wtype = r.get("wait_event_type", "None") + parts.append(f"| {state} | {count} | {wtype} |") + if idle_count > 50: + act_idx += 1 + actions.append( + ( + 1, + f"{act_idx}. Use connection pooling (PgBouncer) -- " + f"{idle_count} idle connections", + ) + ) + parts.append("") - # Build final report: summary at top, then sections, then action plan - header = [f"## Executive Summary\n{exec_summary}\n"] - footer = ["\n## Action Plan (Priority Order)\n"] - if action_items: - footer.extend(action_items) + # --- 3. Top Bottlenecks --- + parts.append("## 3. Top Bottlenecks\n") + if not bottlenecks: + parts.append( + "No significant performance bottlenecks detected in the collected data.\n" + ) else: - footer.append( - "No critical action items — database appears healthy based " - "on collected data." + sev_label = { + 1: "SEV-1 (Critical)", + 2: "SEV-2 (Important)", + 3: "SEV-3 (Advisory)", + } + sev_emoji = {1: "SEV-1", 2: "SEV-2", 3: "SEV-3"} + bn_idx = 0 + for sev in (1, 2, 3): + group = [b for b in bottlenecks if b[0] == sev] + if not group: + continue + for _, title, detail in group: + bn_idx += 1 + parts.append( + f"### {sev_emoji.get(sev, '')} {bn_idx}. " + f"{title} ({sev_label[sev]})\n" + ) + parts.append(detail) + parts.append("") + + # --- 4. Configuration Review --- + section_num = 4 + if config_rows: + parts.append(f"## {section_num}. Configuration Review\n") + if is_oracle: + parts.append("| Parameter | Value | Description |\n| --- | --- | --- |") + for r in config_rows: + name = r.get("name", "?") + val = r.get("value", "?") + desc = _truncate_sql(str(r.get("description", "")), 80) + parts.append(f"| `{name}` | `{val}` | {desc} |") + else: + parts.append("| Parameter | Value | Unit |\n| --- | --- | --- |") + risky_params: dict[str, str] = {} + for r in config_rows: + name = r.get("name", "?") + val = r.get("setting", "?") + unit = r.get("unit", "") or "" + parts.append(f"| `{name}` | `{val}` | {unit} |") + if name == "statement_timeout" and str(val) == "0": + risky_params[name] = ( + "No statement timeout -- risk of runaway queries" + ) + if name == "idle_in_transaction_session_timeout" and str(val) == "0": + risky_params[name] = "No idle-in-tx timeout -- risk of bloat" + if name == "max_connections": + max_conn = _safe_int(val) + if max_conn > 500: + risky_params[name] = ( + f"max_connections={max_conn} is high -- " + f"use connection pooling" + ) + if risky_params: + parts.append("\n**Risks:**") + for param, msg in risky_params.items(): + parts.append(f"- `{param}`: {msg}") + risks.append(("Medium", "Medium", msg)) + parts.append("") + section_num += 1 + + # --- 5. Risk Register --- + if risks: + parts.append(f"## {section_num}. Risk Register\n") + parts.append("| Risk | Likelihood | Impact |\n| --- | --- | --- |") + seen_risks: set[str] = set() + for likelihood, impact, desc in risks: + if desc not in seen_risks: + seen_risks.add(desc) + parts.append(f"| {desc} | {likelihood} | {impact} |") + parts.append("") + section_num += 1 + + # --- 6. Prioritised Action Plan --- + parts.append(f"## {section_num}. Prioritised Action Plan\n") + if not actions: + parts.append( + "No critical action items -- database appears healthy " + "based on collected data." ) + else: + p0 = [a for a in actions if a[0] == 0] + p1 = [a for a in actions if a[0] == 1] + p2 = [a for a in actions if a[0] == 2] + if p0: + parts.append("### Priority 0 -- Immediate (this sprint)\n") + for _, text in p0: + parts.append(text) + parts.append("") + if p1: + parts.append("### Priority 1 -- Structural\n") + for _, text in p1: + parts.append(text) + parts.append("") + if p2: + parts.append("### Priority 2 -- Performance Hygiene\n") + for _, text in p2: + parts.append(text) + parts.append("") - return "\n".join(header + parts + footer) + return "\n".join(parts) def _get_rows(data: dict[str, Any], key: str) -> list[dict[str, Any]]: @@ -1752,9 +2178,9 @@ def analyse_awr_snaps(self, begin_snap: int, end_snap: int) -> dict[str, Any]: def analyse_uploaded_report( self, file_content: str, file_name: str ) -> dict[str, Any]: - """Parse an uploaded report file and generate LLM analysis.""" + """Parse an uploaded report file and display it.""" parsed = parse_uploaded_report(file_content, file_name) - return self._run_llm_analysis_from_text(parsed) + return self._run_uploaded_report_analysis(parsed) def list_awr_snapshots(self) -> list[dict[str, Any]]: """Return available AWR snapshots from DBA_HIST_SNAPSHOT.""" @@ -1804,23 +2230,19 @@ def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]: "analysis": findings_report, } - def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]: - # For uploaded reports, we still need the LLM since we don't - # have structured data — but we keep the prompt minimal. - llm_prompt = ( - report_text + "\n\n---\n" - "Summarise the key performance issues in the report above. " - "Only reference data that actually appears above. " - "Do NOT invent sql_ids, table names, or metrics." - ) - try: - llm_response = self.llm_client.generate(prompt=llm_prompt) - except (ConnectionError, RuntimeError) as exc: - llm_response = f"LLM analysis failed: {exc}" + def _run_uploaded_report_analysis(self, report_text: str) -> dict[str, Any]: + # For uploaded reports we cannot do structured analysis. + # Display the parsed text as-is — no LLM involved. return { "raw_data": {}, "report_text": report_text, - "analysis": llm_response, + "analysis": ( + "## Uploaded Report\n\n" + "The parsed report content is shown below. " + "For detailed programmatic analysis, use **Live** mode " + "which queries the database directly.\n\n" + "---\n\n" + report_text[:8000] + ), } # -- Oracle collection --------------------------------------------------- @@ -1844,6 +2266,8 @@ def _collect_oracle(self) -> dict[str, Any]: "redo_log_switches": _ORA_REDO_LOG_SWITCHES, "temp_usage": _ORA_TEMP_USAGE, "parallel_queries": _ORA_PARALLEL_QUERIES, + "config_params": _ORA_CONFIG_PARAMS, + "idle_sessions": _ORA_IDLE_SESSIONS, } for name, sql in queries.items(): result = self.db_client.execute_query(sql) @@ -1958,6 +2382,7 @@ def _collect_postgresql(self) -> dict[str, Any]: "existing_indexes": _PG_EXISTING_INDEXES, "stale_stats_vacuum": _PG_STALE_STATS, "table_stats": _PG_TABLE_STATS, + "table_sizes": _PG_TABLE_SIZES, "database_stats": _PG_DB_STATS, "bgwriter_stats": bgwriter_sql, "unused_indexes": _PG_UNUSED_INDEXES, @@ -1967,7 +2392,13 @@ def _collect_postgresql(self) -> dict[str, Any]: "temp_file_usage": _PG_TEMP_FILE_USAGE, "connection_stats": _PG_CONNECTION_STATS, "checkpoint_stats": checkpoint_sql, + "idle_in_transaction": _PG_IDLE_IN_TRANSACTION, + "config_params": _PG_CONFIG_PARAMS, + "replication_status": _PG_REPLICATION_STATUS, } + # WAL stats only available in PG 14+ + if pg_major >= 14: + queries["wal_stats"] = _PG_WAL_STATS for name, sql in queries.items(): result = self.db_client.execute_query(sql) if "error" in result: diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py index e517384..f035cc7 100644 --- a/tools/pg-assistant/snapshot_compare.py +++ b/tools/pg-assistant/snapshot_compare.py @@ -814,60 +814,6 @@ def _delta_row( "change_pct": f"{direction}{abs(pct):.1f}%", } - # -- LLM comparison analysis --------------------------------------------- - - def _format_comparison_text( - self, - data_a: dict[str, Any], - data_b: dict[str, Any], - label_a: str, - label_b: str, - delta_table: list[dict[str, Any]], - ) -> str: - parts = [ - f"SNAPSHOT COMPARISON REPORT\n{'=' * 60}", - f"Snapshot A: {label_a}", - f"Snapshot B: {label_b}\n", - "--- DELTA SUMMARY ---", - ] - for row in delta_table: - parts.append( - f" {row['metric']}: {row[label_a]} -> {row[label_b]} " - f"(delta={row['delta']}, {row['change_pct']})" - ) - - parts.append("\n--- SNAPSHOT A: TOP SQL ---") - for i, row in enumerate(data_a.get("top_sql", [])[:10], 1): - parts.append(f" [{i}] {_fmt(row)}") - - parts.append("\n--- SNAPSHOT B: TOP SQL ---") - for i, row in enumerate(data_b.get("top_sql", [])[:10], 1): - parts.append(f" [{i}] {_fmt(row)}") - - parts.append("\n--- SNAPSHOT A: WAIT EVENTS ---") - for i, row in enumerate(data_a.get("wait_events", [])[:10], 1): - parts.append(f" [{i}] {_fmt(row)}") - - parts.append("\n--- SNAPSHOT B: WAIT EVENTS ---") - for i, row in enumerate(data_b.get("wait_events", [])[:10], 1): - parts.append(f" [{i}] {_fmt(row)}") - - return "\n".join(parts) - - def _get_llm_comparison(self, text: str) -> str: - # Build programmatic comparison findings first, then ask LLM - # for a brief summary only. - try: - llm_prompt = ( - text + "\n\n---\n" - "Based on the snapshot comparison data above, write 3-5 sentences " - "summarising what changed and what the DBA should investigate. " - "Do NOT invent any sql_ids, table names, or metrics." - ) - return self.llm.generate(prompt=llm_prompt) - except (ConnectionError, RuntimeError) as exc: - return f"LLM comparison summary unavailable: {exc}" - def _build_programmatic_comparison( self, data_a: dict[str, Any], From f43ffa43dd6d95fbe5e253d9e9e579ae304a4fb6 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 6 Apr 2026 12:26:19 +0000 Subject: [PATCH 19/19] Add structured HTML/CSV parsers for uploaded report analysis (pgProfile, AWR, pg_stat_statements) --- tools/pg-assistant/auto_analyse.py | 790 ++++++++++++++++++++++++++++- 1 file changed, 782 insertions(+), 8 deletions(-) diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py index 004b351..7b09b3a 100644 --- a/tools/pg-assistant/auto_analyse.py +++ b/tools/pg-assistant/auto_analyse.py @@ -10,6 +10,7 @@ import io import logging import re +from html.parser import HTMLParser from typing import Any from db_client import BaseDBClient, DB_TYPE_ORACLE, DB_TYPE_POSTGRESQL @@ -2178,9 +2179,8 @@ def analyse_awr_snaps(self, begin_snap: int, end_snap: int) -> dict[str, Any]: def analyse_uploaded_report( self, file_content: str, file_name: str ) -> dict[str, Any]: - """Parse an uploaded report file and display it.""" - parsed = parse_uploaded_report(file_content, file_name) - return self._run_uploaded_report_analysis(parsed) + """Parse an uploaded report file and run programmatic analysis.""" + return self._run_uploaded_report_analysis(file_content, file_name) def list_awr_snapshots(self) -> list[dict[str, Any]]: """Return available AWR snapshots from DBA_HIST_SNAPSHOT.""" @@ -2230,18 +2230,31 @@ def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]: "analysis": findings_report, } - def _run_uploaded_report_analysis(self, report_text: str) -> dict[str, Any]: - # For uploaded reports we cannot do structured analysis. - # Display the parsed text as-is — no LLM involved. + def _run_uploaded_report_analysis( + self, file_content: str, file_name: str + ) -> dict[str, Any]: + # Try to parse into structured data for programmatic analysis. + structured = parse_uploaded_report_structured(file_content, file_name) + if structured: + findings_report = _build_findings_report(structured) + report_text = self._format_report(structured) + return { + "raw_data": structured, + "report_text": report_text, + "analysis": findings_report, + } + # Fallback: display parsed text as-is + parsed_text = parse_uploaded_report(file_content, file_name) return { "raw_data": {}, - "report_text": report_text, + "report_text": parsed_text, "analysis": ( "## Uploaded Report\n\n" + "Could not extract structured data from this report format. " "The parsed report content is shown below. " "For detailed programmatic analysis, use **Live** mode " "which queries the database directly.\n\n" - "---\n\n" + report_text[:8000] + "---\n\n" + parsed_text[:8000] ), } @@ -2576,3 +2589,764 @@ def _parse_text_report(content: str, file_name: str) -> str: parts.append(content) return "\n".join(parts) + + +# --------------------------------------------------------------------------- +# Structured report parsing — extract data into dict for _build_findings_report +# --------------------------------------------------------------------------- + + +class _HTMLTableExtractor(HTMLParser): + """Extract all HTML tables as list of list-of-dicts (header→value).""" + + def __init__(self) -> None: + super().__init__() + self.tables: list[list[dict[str, str]]] = [] + self._in_table = False + self._in_thead = False + self._in_row = False + self._in_cell = False + self._headers: list[str] = [] + self._current_row: list[str] = [] + self._current_rows: list[list[str]] = [] + self._cell_text = "" + self._current_headers: list[str] = [] + # Track section headers (h1-h4, caption) preceding each table + self._section_headers: list[str] = [] + self._last_heading = "" + self._in_heading = False + self._heading_text = "" + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + tag = tag.lower() + if tag == "table": + self._in_table = True + self._headers = [] + self._current_rows = [] + self._current_headers = [] + elif tag == "thead": + self._in_thead = True + elif tag == "tr": + self._in_row = True + self._current_row = [] + elif tag in ("td", "th"): + self._in_cell = True + self._cell_text = "" + elif tag in ("h1", "h2", "h3", "h4", "caption"): + self._in_heading = True + self._heading_text = "" + + def handle_endtag(self, tag: str) -> None: + tag = tag.lower() + if tag == "table": + self._in_table = False + if self._current_headers and self._current_rows: + rows = [] + for raw in self._current_rows: + row_dict: dict[str, str] = {} + for i, hdr in enumerate(self._current_headers): + row_dict[hdr] = raw[i] if i < len(raw) else "" + rows.append(row_dict) + self.tables.append(rows) + self._section_headers.append(self._last_heading) + elif tag == "thead": + self._in_thead = False + elif tag == "tr": + self._in_row = False + if self._in_thead or (not self._current_headers and self._current_row): + self._current_headers = [c.strip().lower() for c in self._current_row] + elif self._current_headers: + self._current_rows.append(self._current_row) + elif tag in ("td", "th"): + self._in_cell = False + self._current_row.append(self._cell_text.strip()) + elif tag in ("h1", "h2", "h3", "h4", "caption"): + self._in_heading = False + self._last_heading = self._heading_text.strip().lower() + + def handle_data(self, data: str) -> None: + if self._in_cell: + self._cell_text += data + if self._in_heading: + self._heading_text += data + + +def _extract_html_tables( + html: str, +) -> list[tuple[str, list[dict[str, str]]]]: + """Return list of (section_heading, rows) from HTML tables.""" + parser = _HTMLTableExtractor() + parser.feed(html) + result: list[tuple[str, list[dict[str, str]]]] = [] + for i, table_rows in enumerate(parser.tables): + heading = parser._section_headers[i] if i < len(parser._section_headers) else "" + result.append((heading, table_rows)) + return result + + +def _match_heading(heading: str, *keywords: str) -> bool: + """Check if heading contains ALL given keywords (case-insensitive).""" + h = heading.lower() + return all(k in h for k in keywords) + + +def _parse_pgprofile_structured(html: str) -> dict[str, Any] | None: + """Parse pgProfile HTML report into structured dict for analysis.""" + tables = _extract_html_tables(html) + if not tables: + return None + + sections: dict[str, Any] = {"db_type": DB_TYPE_POSTGRESQL} + found_any = False + + for heading, rows in tables: + if not rows: + continue + + # --- Top SQL by elapsed time --- + if _match_heading(heading, "sql", "elapsed") or _match_heading( + heading, "top", "elapsed" + ): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "queryid": r.get("queryid", r.get("query id", "")), + "query_text": r.get( + "query text", + r.get("query", r.get("sql text", "")), + ), + "total_exec_sec": _safe_float( + r.get( + "total elapsed", + r.get( + "elapsed", + r.get("total_time", r.get("total time", 0)), + ), + ) + ), + "calls": _safe_int(r.get("calls", r.get("executions", 0))), + "mean_exec_sec": _safe_float( + r.get( + "mean elapsed", + r.get("mean_time", r.get("mean time", 0)), + ) + ), + "shared_blks_hit": _safe_int( + r.get( + "shared_blks_hit", + r.get("shared blks hit", 0), + ) + ), + "shared_blks_read": _safe_int( + r.get( + "shared_blks_read", + r.get("shared blks read", 0), + ) + ), + } + ) + if mapped: + sections["top_queries"] = mapped + found_any = True + + # --- Top SQL by executions --- + elif _match_heading(heading, "sql", "execution") or _match_heading( + heading, "top", "execution" + ): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "queryid": r.get("queryid", r.get("query id", "")), + "query_text": r.get( + "query text", + r.get("query", r.get("sql text", "")), + ), + "calls": _safe_int(r.get("calls", r.get("executions", 0))), + "total_exec_sec": _safe_float( + r.get( + "total elapsed", + r.get("total_time", r.get("total time", 0)), + ) + ), + "mean_exec_sec": _safe_float( + r.get( + "mean elapsed", + r.get("mean_time", r.get("mean time", 0)), + ) + ), + } + ) + if mapped: + sections["high_execution_count"] = mapped + found_any = True + + # --- Top SQL by I/O / reads --- + elif _match_heading(heading, "sql", "read") or _match_heading( + heading, "sql", "i/o" + ): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "queryid": r.get("queryid", r.get("query id", "")), + "query_text": r.get( + "query text", + r.get("query", r.get("sql text", "")), + ), + "total_exec_sec": _safe_float( + r.get( + "total elapsed", + r.get("total_time", r.get("total time", 0)), + ) + ), + "calls": _safe_int(r.get("calls", r.get("executions", 0))), + "shared_blks_read": _safe_int( + r.get( + "reads", + r.get( + "shared_blks_read", + r.get("shared blks read", 0), + ), + ) + ), + } + ) + if mapped: + sections["top_cpu_queries"] = mapped + found_any = True + + # --- Top SQL by planning time --- + elif _match_heading(heading, "sql", "plan"): + # Map to high_elapsed_per_exec for analysis + mapped = [] + for r in rows[:20]: + avg = _safe_float( + r.get( + "mean plan", + r.get("mean_plan_time", r.get("mean plan time", 0)), + ) + ) + if avg > 0.001: + mapped.append( + { + "queryid": r.get("queryid", r.get("query id", "")), + "query_text": r.get( + "query text", + r.get("query", r.get("sql text", "")), + ), + "avg_elapsed_sec": avg, + "total_exec_sec": _safe_float( + r.get( + "total plan", + r.get( + "total_plan_time", + r.get("total plan time", 0), + ), + ) + ), + "calls": _safe_int(r.get("calls", r.get("executions", 0))), + } + ) + if mapped: + sections.setdefault("high_elapsed_per_exec", mapped) + found_any = True + + # --- Top SQL by temp usage --- + elif _match_heading(heading, "sql", "temp") or _match_heading( + heading, "temp", "file" + ): + mapped = [] + for r in rows[:20]: + temp = _safe_float( + r.get( + "temp", + r.get("temp_blks_written", r.get("temp blks written", 0)), + ) + ) + if temp > 0: + mapped.append( + { + "queryid": r.get("queryid", r.get("query id", "")), + "query_text": r.get( + "query text", + r.get("query", r.get("sql text", "")), + ), + "temp_mb": temp, + "calls": _safe_int(r.get("calls", r.get("executions", 0))), + "total_exec_sec": _safe_float( + r.get( + "total elapsed", + r.get( + "total_time", + r.get("total time", 0), + ), + ) + ), + } + ) + if mapped: + sections["temp_file_usage"] = mapped + found_any = True + + # --- Top tables by sequential scans --- + elif _match_heading(heading, "table", "seq") or _match_heading( + heading, "sequential scan" + ): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "schemaname": r.get("schema", r.get("schemaname", "public")), + "relname": r.get( + "table", + r.get("relname", r.get("relation", "")), + ), + "seq_scan": _safe_int(r.get("seq scan", r.get("seq_scan", 0))), + "idx_scan": _safe_int(r.get("idx scan", r.get("idx_scan", 0))), + "n_live_tup": _safe_int( + r.get( + "live", + r.get( + "n_live_tup", + r.get("live tuples", 0), + ), + ) + ), + "table_size_mb": _safe_float( + r.get("size", r.get("table size", 0)) + ), + } + ) + if mapped: + sections["seq_scan_tables"] = mapped + found_any = True + + # --- Top tables by DML / inserts+updates+deletes --- + elif _match_heading(heading, "table", "dml") or _match_heading( + heading, "table", "insert" + ): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "schemaname": r.get("schema", r.get("schemaname", "public")), + "relname": r.get( + "table", + r.get("relname", r.get("relation", "")), + ), + "total_size_mb": _safe_float( + r.get( + "size", + r.get("table size", r.get("total_size_mb", 0)), + ) + ), + "table_size_mb": _safe_float( + r.get( + "table size", + r.get("table_size_mb", 0), + ) + ), + "n_live_tup": _safe_int( + r.get( + "live", + r.get("n_live_tup", r.get("live tuples", 0)), + ) + ), + "n_tup_ins": _safe_int( + r.get("ins", r.get("n_tup_ins", r.get("inserts", 0))) + ), + "n_tup_upd": _safe_int( + r.get("upd", r.get("n_tup_upd", r.get("updates", 0))) + ), + "n_tup_del": _safe_int( + r.get("del", r.get("n_tup_del", r.get("deletes", 0))) + ), + "n_dead_tup": _safe_int( + r.get( + "dead", + r.get("n_dead_tup", r.get("dead tuples", 0)), + ) + ), + } + ) + if mapped: + sections["table_sizes"] = mapped + found_any = True + + # --- Wait events --- + elif _match_heading(heading, "wait") and not _match_heading(heading, "sql"): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "event": r.get( + "event", + r.get("wait event", r.get("event_name", "")), + ), + "total_waits": _safe_int( + r.get("waits", r.get("total_waits", r.get("count", 0))) + ), + "time_waited_sec": _safe_float( + r.get( + "waited", + r.get( + "time_waited", + r.get("time waited", 0), + ), + ) + ), + } + ) + if mapped: + sections["wait_events"] = mapped + found_any = True + + # --- Vacuum / dead tuples --- + elif _match_heading(heading, "vacuum") or _match_heading(heading, "dead"): + mapped = [] + for r in rows[:20]: + dp = _safe_float(r.get("dead_pct", r.get("dead %", 0))) + dead = _safe_int( + r.get( + "dead", + r.get("n_dead_tup", r.get("dead tuples", 0)), + ) + ) + if dead > 0 or dp > 0: + mapped.append( + { + "schemaname": r.get( + "schema", r.get("schemaname", "public") + ), + "relname": r.get( + "table", + r.get("relname", r.get("relation", "")), + ), + "dead_pct": dp, + "n_dead_tup": dead, + "table_size_mb": _safe_float( + r.get("size", r.get("table size", 0)) + ), + "last_autovacuum": r.get( + "last autovacuum", + r.get("last_autovacuum", ""), + ), + } + ) + if mapped: + sections["bloat_estimate"] = mapped + found_any = True + + # --- Database statistics --- + elif _match_heading(heading, "database", "stat"): + if rows: + r = rows[0] + sections["database_stats"] = [ + { + "cache_hit_pct": _safe_float( + r.get( + "hit ratio", + r.get("cache_hit_pct", r.get("blks_hit_%", 100)), + ) + ), + "xact_commit": _safe_int( + r.get( + "commits", + r.get("xact_commit", r.get("xact commit", 0)), + ) + ), + "xact_rollback": _safe_int( + r.get( + "rollbacks", + r.get( + "xact_rollback", + r.get("xact rollback", 0), + ), + ) + ), + "numbackends": _safe_int( + r.get( + "backends", + r.get("numbackends", r.get("connections", 0)), + ) + ), + "temp_bytes": _safe_int( + r.get("temp_bytes", r.get("temp bytes", 0)) + ), + "temp_files": _safe_int( + r.get("temp_files", r.get("temp files", 0)) + ), + } + ] + found_any = True + + if not found_any: + return None + return sections + + +def _parse_csv_structured(content: str) -> dict[str, Any] | None: + """Parse pg_stat_statements CSV export into structured dict.""" + reader = csv.DictReader(io.StringIO(content)) + rows = list(reader) + if not rows: + return None + + # Normalise headers to lowercase + normalised: list[dict[str, str]] = [] + for row in rows: + normalised.append({k.lower().strip(): v for k, v in row.items()}) + rows = normalised + + sections: dict[str, Any] = {"db_type": DB_TYPE_POSTGRESQL} + + # Map CSV columns to expected structure + top_queries: list[dict[str, Any]] = [] + high_exec: list[dict[str, Any]] = [] + high_elapsed: list[dict[str, Any]] = [] + temp_usage: list[dict[str, Any]] = [] + + for r in rows: + qid = r.get("queryid", r.get("query_id", "")) + query_text = r.get("query", r.get("query_text", "")) + calls = _safe_int(r.get("calls", r.get("executions", 0))) + total_time = _safe_float( + r.get( + "total_exec_time", + r.get("total_time", r.get("total_elapsed", 0)), + ) + ) + # pg_stat_statements reports time in ms, convert to sec + if total_time > 1000: + total_time_sec = total_time / 1000 + else: + total_time_sec = total_time + mean_time = _safe_float( + r.get( + "mean_exec_time", + r.get("mean_time", r.get("mean_elapsed", 0)), + ) + ) + if mean_time > 1000: + mean_time_sec = mean_time / 1000 + else: + mean_time_sec = mean_time + blks_hit = _safe_int(r.get("shared_blks_hit", 0)) + blks_read = _safe_int(r.get("shared_blks_read", 0)) + temp_blks = _safe_int(r.get("temp_blks_written", r.get("temp_blks_read", 0))) + + entry = { + "queryid": qid, + "query_text": query_text, + "total_exec_sec": total_time_sec, + "calls": calls, + "mean_exec_sec": mean_time_sec, + "shared_blks_hit": blks_hit, + "shared_blks_read": blks_read, + } + top_queries.append(entry) + + if calls > 1000: + high_exec.append(entry) + if mean_time_sec > 1: + high_elapsed.append({**entry, "avg_elapsed_sec": mean_time_sec}) + if temp_blks > 0: + temp_usage.append( + { + **entry, + "temp_mb": temp_blks * 8 / 1024, # 8KB blocks to MB + } + ) + + if not top_queries: + return None + + # Sort by total elapsed desc + top_queries.sort(key=lambda x: x["total_exec_sec"], reverse=True) + high_exec.sort(key=lambda x: x["calls"], reverse=True) + high_elapsed.sort(key=lambda x: x["avg_elapsed_sec"], reverse=True) + temp_usage.sort(key=lambda x: x["temp_mb"], reverse=True) + + sections["top_queries"] = top_queries[:20] + if high_exec: + sections["high_execution_count"] = high_exec[:20] + if high_elapsed: + sections["high_elapsed_per_exec"] = high_elapsed[:20] + if temp_usage: + sections["temp_file_usage"] = temp_usage[:20] + + return sections + + +def _parse_awr_html_structured(html: str) -> dict[str, Any] | None: + """Parse AWR HTML report into structured dict for Oracle analysis.""" + tables = _extract_html_tables(html) + if not tables: + return None + + sections: dict[str, Any] = {"db_type": DB_TYPE_ORACLE} + found_any = False + + for heading, rows in tables: + if not rows: + continue + + # --- Top SQL by elapsed time --- + if _match_heading(heading, "sql", "elapsed"): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "sql_id": r.get("sql id", r.get("sql_id", "")), + "sql_text": r.get( + "sql text", + r.get("sql_text", r.get("sql module", "")), + ), + "elapsed_sec": _safe_float( + r.get( + "elapsed time (s)", + r.get("elapsed", r.get("elapsed_sec", 0)), + ) + ), + "executions": _safe_int(r.get("executions", r.get("execs", 0))), + "buffer_gets": _safe_int( + r.get( + "buffer gets", + r.get("buffer_gets", r.get("gets", 0)), + ) + ), + } + ) + if mapped: + sections["top_elapsed_sql"] = mapped + found_any = True + + # --- Top SQL by CPU --- + elif _match_heading(heading, "sql", "cpu"): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "sql_id": r.get("sql id", r.get("sql_id", "")), + "sql_text": r.get( + "sql text", + r.get("sql_text", r.get("sql module", "")), + ), + "cpu_sec": _safe_float( + r.get( + "cpu time (s)", + r.get("cpu", r.get("cpu_sec", 0)), + ) + ), + "executions": _safe_int(r.get("executions", r.get("execs", 0))), + "buffer_gets": _safe_int( + r.get( + "buffer gets", + r.get("buffer_gets", r.get("gets", 0)), + ) + ), + } + ) + if mapped: + sections["top_cpu_sql"] = mapped + found_any = True + + # --- Wait events --- + elif _match_heading(heading, "wait") and _match_heading(heading, "event"): + mapped = [] + for r in rows[:20]: + mapped.append( + { + "event": r.get( + "event", + r.get("event name", r.get("wait event", "")), + ), + "total_waits": _safe_int( + r.get("waits", r.get("total waits", 0)) + ), + "time_waited_sec": _safe_float( + r.get( + "time (s)", + r.get( + "total wait time (s)", + r.get("time waited", 0), + ), + ) + ), + } + ) + if mapped: + sections["wait_events"] = mapped + found_any = True + + # --- System stats / load profile --- + elif _match_heading(heading, "system") or _match_heading( + heading, "load profile" + ): + mapped = [] + for r in rows[:30]: + name = r.get( + "statistic name", + r.get("statistic", r.get("name", "")), + ) + val = r.get("value", r.get("total", r.get("per second", ""))) + if name: + mapped.append({"name": name, "value": _safe_int(val)}) + if mapped: + sections["system_stats"] = mapped + found_any = True + + # --- SGA --- + elif _match_heading(heading, "sga"): + mapped = [] + for r in rows[:10]: + name = r.get("pool", r.get("name", r.get("component", ""))) + size = r.get("size", r.get("size (mb)", r.get("bytes", ""))) + if name: + mapped.append({"name": name, "size_mb": _safe_float(size)}) + if mapped: + sections["sga_info"] = mapped + found_any = True + + if not found_any: + return None + return sections + + +def parse_uploaded_report_structured( + content: str, file_name: str +) -> dict[str, Any] | None: + """Try to parse an uploaded report into structured dict. + + Returns None if the report cannot be parsed into structured data. + """ + lower = file_name.lower() + + if lower.endswith(".csv"): + return _parse_csv_structured(content) + + if lower.endswith((".html", ".htm")): + content_lower = content[:3000].lower() + if "pgprofile" in content_lower or "pg_profile" in content_lower: + return _parse_pgprofile_structured(content) + if "awr" in content_lower or "workload repository" in content_lower: + return _parse_awr_html_structured(content) + # Try pgProfile first (more common), then AWR + result = _parse_pgprofile_structured(content) + if result: + return result + return _parse_awr_html_structured(content) + + # Text reports — attempt to detect tabular data + content_lower = content[:3000].lower() + if "pgprofile" in content_lower or "pg_profile" in content_lower: + # pgProfile text reports may contain HTML tables + if "