From e98833868dd5ca300158a1469fbd37e954eaa487 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sat, 4 Apr 2026 14:43:06 +0000
Subject: [PATCH 01/19] Add AI-powered PostgreSQL assistant CLI tool

- app.py: Main CLI loop with rich terminal output, argument parsing
- llm_client.py: Ollama API client for LLM communication
- mcp_client.py: MCP PostgreSQL server client for query execution
- sql_generator.py: Prompt engineering, SQL extraction, and safety validation
- requirements.txt: Python dependencies (requests, rich)
- README.md: Architecture docs, usage examples, installation instructions

Features:
- Natural language to SQL via Ollama (codellama model)
- Schema-aware prompt engineering
- SQL safety enforcement (SELECT-only, blocks dangerous keywords)
- Retry logic for failed SQL generation
- Rich formatted output with timing metrics
- Interactive CLI commands (help, schema, clear, exit)
---
 tools/pg-assistant/README.md        | 117 ++++++++++
 tools/pg-assistant/app.py           | 326 ++++++++++++++++++++++++++++
 tools/pg-assistant/llm_client.py    | 115 ++++++++++
 tools/pg-assistant/mcp_client.py    | 174 +++++++++++++++
 tools/pg-assistant/requirements.txt |   2 +
 tools/pg-assistant/sql_generator.py | 235 ++++++++++++++++++++
 6 files changed, 969 insertions(+)
 create mode 100644 tools/pg-assistant/README.md
 create mode 100644 tools/pg-assistant/app.py
 create mode 100644 tools/pg-assistant/llm_client.py
 create mode 100644 tools/pg-assistant/mcp_client.py
 create mode 100644 tools/pg-assistant/requirements.txt
 create mode 100644 tools/pg-assistant/sql_generator.py

diff --git a/tools/pg-assistant/README.md b/tools/pg-assistant/README.md
new file mode 100644
index 0000000..dda1004
--- /dev/null
+++ b/tools/pg-assistant/README.md
@@ -0,0 +1,117 @@
+# pg-assistant — AI-Powered PostgreSQL CLI
+
+A production-ready Python CLI that converts natural language questions into SQL queries using a local LLM (Ollama) and executes them against PostgreSQL via an MCP server.
+
+## Architecture
+
+```
+User Question (natural language)
+        │
+        ▼
+┌──────────────────┐
+│  sql_generator   │  ← Prompt engineering + safety validation
+│                  │
+│  ┌────────────┐  │
+│  │ llm_client │──┼──→ Ollama API (codellama)
+│  └────────────┘  │
+└────────┬─────────┘
+         │ validated SELECT query
+         ▼
+┌──────────────────┐
+│   mcp_client     │──→ MCP PostgreSQL Server
+└──────────────────┘
+         │
+         ▼
+   Formatted Results (rich tables)
+```
+
+| Module            | Responsibility                                  |
+|-------------------|--------------------------------------------------|
+| `app.py`          | CLI loop, argument parsing, rich output          |
+| `llm_client.py`   | Ollama API communication                        |
+| `mcp_client.py`   | MCP PostgreSQL server communication             |
+| `sql_generator.py` | Prompt engineering, SQL extraction, safety checks |
+
+## Prerequisites
+
+- **Python 3.10+**
+- **Ollama** running locally with the `codellama` model pulled:
+  ```bash
+  ollama serve &
+  ollama pull codellama
+  ```
+- **MCP PostgreSQL server** running on `http://localhost:3000`
+- **PostgreSQL** with `pg_stat_statements` enabled
+
+## Installation
+
+```bash
+cd tools/pg-assistant
+pip install -r requirements.txt
+```
+
+## Usage
+
+```bash
+# Basic usage (defaults: Ollama on :11434, MCP on :3000)
+python app.py
+
+# Custom endpoints
+python app.py --ollama-url http://localhost:11434 --mcp-url http://localhost:3000
+
+# Use a different model
+python app.py --model mistral
+
+# Verbose/debug logging
+python app.py -v
+
+# Specify a PostgreSQL schema
+python app.py --schema my_schema
+```
+
+### CLI Commands
+
+| Command    | Description                          |
+|------------|--------------------------------------|
+| `help`     | Show available commands and examples |
+| `schema`   | Refresh and display database schema  |
+| `clear`    | Clear the terminal screen            |
+| `exit`     | Quit the application                 |
+
+### Example Session
+
+```
+pg-assistant> Show me the top 5 largest tables
+
+┌─────────────────────────────────────────────────┐
+│ Generated SQL                                   │
+├─────────────────────────────────────────────────┤
+│ SELECT schemaname, relname, n_live_tup           │
+│ FROM pg_stat_user_tables                         │
+│ ORDER BY n_live_tup DESC                         │
+│ LIMIT 5;                                         │
+└─────────────────────────────────────────────────┘
+
+┌─────────────┬──────────┬────────────┐
+│ schemaname  │ relname  │ n_live_tup │
+├─────────────┼──────────┼────────────┤
+│ public      │ orders   │ 1000000    │
+│ public      │ users    │ 500000     │
+│ ...         │ ...      │ ...        │
+└─────────────┴──────────┴────────────┘
+5 row(s) returned in 42ms
+```
+
+## SQL Safety
+
+The assistant enforces **read-only access** by:
+
+1. Blocking dangerous keywords: `DROP`, `DELETE`, `TRUNCATE`, `UPDATE`, `INSERT`, `ALTER`, `CREATE`, `GRANT`, `REVOKE`, `EXEC`, `EXECUTE`
+2. Requiring queries to start with `SELECT` or `WITH` (CTEs)
+3. Stripping string literals before keyword scanning to avoid false positives
+
+## Schema Awareness
+
+On startup, the assistant fetches `information_schema` metadata and injects it into every LLM prompt. This provides the model with table names, column names, data types, and constraints — significantly improving SQL generation accuracy.
+
+Refresh the schema at any time with the `schema` command.
diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
new file mode 100644
index 0000000..d1437f6
--- /dev/null
+++ b/tools/pg-assistant/app.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python3
+"""AI-powered PostgreSQL assistant CLI application.
+
+Converts natural language questions into SQL queries using a local LLM (Ollama)
+and executes them against a PostgreSQL database via an MCP server.
+"""
+
+import argparse
+import logging
+import sys
+import time
+
+from rich.console import Console
+from rich.logging import RichHandler
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+from llm_client import LLMClient
+from mcp_client import MCPClient
+from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError
+
+console = Console()
+
+HELP_TEXT = """
+[bold cyan]Available Commands:[/bold cyan]
+
+  [green]exit[/green] / [green]quit[/green]       Quit the application
+  [green]help[/green]              Show this help message
+  [green]schema[/green]            Refresh and display the database schema
+  [green]clear[/green]             Clear the terminal screen
+
+[bold cyan]Example Questions:[/bold cyan]
+
+  • Show me all tables in the database
+  • What are the top 10 largest tables by row count?
+  • List all active connections to the database
+  • Show the slowest queries from pg_stat_statements
+  • What indexes exist on the users table?
+  • Show me the table structure for the orders table
+"""
+
+BANNER = r"""
+[bold cyan]╔══════════════════════════════════════════════════╗
+║        AI PostgreSQL Assistant (pg-assistant)     ║
+║  Natural Language → SQL via Ollama + MCP Server   ║
+╚══════════════════════════════════════════════════╝[/bold cyan]
+"""
+
+
+def setup_logging(verbose: bool = False) -> None:
+    """Configure logging with rich handler."""
+    level = logging.DEBUG if verbose else logging.INFO
+    logging.basicConfig(
+        level=level,
+        format="%(message)s",
+        datefmt="[%X]",
+        handlers=[RichHandler(console=console, rich_tracebacks=True, show_path=False)],
+    )
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="AI-powered PostgreSQL assistant using Ollama and MCP",
+    )
+    parser.add_argument(
+        "--ollama-url",
+        default="http://localhost:11434",
+        help="Ollama server URL (default: http://localhost:11434)",
+    )
+    parser.add_argument(
+        "--mcp-url",
+        default="http://localhost:3000",
+        help="MCP PostgreSQL server URL (default: http://localhost:3000)",
+    )
+    parser.add_argument(
+        "--model",
+        default="codellama",
+        help="Ollama model name (default: codellama)",
+    )
+    parser.add_argument(
+        "--schema",
+        default="public",
+        help="PostgreSQL schema to use for context (default: public)",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose/debug logging",
+    )
+    return parser.parse_args()
+
+
+def check_services(llm_client: LLMClient, mcp_client: MCPClient) -> bool:
+    """Verify that Ollama and MCP services are reachable."""
+    all_ok = True
+
+    with console.status("[bold yellow]Checking Ollama server..."):
+        if llm_client.health_check():
+            console.print("  [green]✓[/green] Ollama server is reachable")
+            models = llm_client.list_models()
+            if models:
+                model_names = [m.get("name", "unknown") for m in models]
+                console.print(f"    Available models: {', '.join(model_names)}")
+        else:
+            console.print(
+                f"  [red]✗[/red] Cannot reach Ollama at {llm_client.base_url}"
+            )
+            all_ok = False
+
+    with console.status("[bold yellow]Checking MCP server..."):
+        if mcp_client.health_check():
+            console.print("  [green]✓[/green] MCP PostgreSQL server is reachable")
+        else:
+            console.print(
+                f"  [red]✗[/red] Cannot reach MCP server at {mcp_client.base_url}"
+            )
+            all_ok = False
+
+    return all_ok
+
+
+def load_schema(
+    mcp_client: MCPClient,
+    sql_generator: SQLGenerator,
+    schema_name: str,
+) -> None:
+    """Load and display database schema metadata."""
+    with console.status("[bold yellow]Loading database schema..."):
+        schema = mcp_client.get_schema(schema_name)
+
+    if schema:
+        sql_generator.update_schema(schema)
+        display_schema(schema)
+    else:
+        console.print(
+            "[yellow]⚠ Could not load schema metadata. "
+            "SQL generation will proceed without schema context.[/yellow]"
+        )
+
+
+def display_schema(schema: dict) -> None:
+    """Render the database schema as a rich table."""
+    table = Table(
+        title="Database Schema",
+        show_header=True,
+        header_style="bold magenta",
+    )
+    table.add_column("Table", style="cyan", no_wrap=True)
+    table.add_column("Column", style="green")
+    table.add_column("Type", style="yellow")
+    table.add_column("Nullable", style="dim")
+
+    for table_name, columns in schema.items():
+        for i, col in enumerate(columns):
+            table.add_row(
+                table_name if i == 0 else "",
+                col["column_name"],
+                col["data_type"],
+                col["is_nullable"],
+            )
+        table.add_section()
+
+    console.print(table)
+
+
+def display_results(result: dict) -> None:
+    """Render query results as a rich table."""
+    if "error" in result:
+        console.print(f"\n[red]Query Error:[/red] {result['error']}")
+        return
+
+    columns = result.get("columns", [])
+    rows = result.get("rows", [])
+    row_count = result.get("row_count", len(rows))
+    elapsed_ms = result.get("elapsed_ms", 0)
+
+    if not rows:
+        console.print("\n[yellow]Query returned no results.[/yellow]")
+        return
+
+    table = Table(
+        title="Query Results",
+        show_header=True,
+        header_style="bold magenta",
+        show_lines=True,
+    )
+
+    # Determine column names
+    if columns:
+        col_names = columns
+    elif rows and isinstance(rows[0], dict):
+        col_names = list(rows[0].keys())
+    else:
+        col_names = [f"col_{i}" for i in range(len(rows[0]) if rows else 0)]
+
+    for col_name in col_names:
+        table.add_column(str(col_name), style="cyan", overflow="fold")
+
+    for row in rows:
+        if isinstance(row, dict):
+            table.add_row(*[str(v) if v is not None else "NULL" for v in row.values()])
+        elif isinstance(row, (list, tuple)):
+            table.add_row(*[str(v) if v is not None else "NULL" for v in row])
+
+    console.print(table)
+    console.print(f"\n[dim]{row_count} row(s) returned in {elapsed_ms}ms[/dim]")
+
+
+def process_query(
+    user_input: str,
+    sql_generator: SQLGenerator,
+    mcp_client: MCPClient,
+) -> None:
+    """Process a natural language query end-to-end."""
+    # Step 1: Generate SQL
+    console.print()
+    with console.status("[bold yellow]Generating SQL..."):
+        start_gen = time.monotonic()
+        try:
+            sql = sql_generator.generate_sql(user_input)
+        except UnsafeSQLError as exc:
+            console.print(f"\n[red]Safety Block:[/red] {exc}")
+            return
+        except SQLGenerationError as exc:
+            console.print(f"\n[red]Generation Error:[/red] {exc}")
+            return
+        gen_elapsed = time.monotonic() - start_gen
+
+    # Step 2: Display generated SQL
+    console.print(
+        Panel(
+            Text(sql, style="green"),
+            title="[bold]Generated SQL[/bold]",
+            subtitle=f"[dim]generated in {gen_elapsed:.2f}s[/dim]",
+            border_style="blue",
+        )
+    )
+
+    # Step 3: Execute SQL
+    with console.status("[bold yellow]Executing query..."):
+        start_exec = time.monotonic()
+        try:
+            result = mcp_client.execute_query(sql)
+        except (ConnectionError, RuntimeError) as exc:
+            console.print(f"\n[red]Execution Error:[/red] {exc}")
+            return
+        exec_elapsed = time.monotonic() - start_exec
+
+    # Step 4: Display results
+    if "elapsed_ms" not in result:
+        result["elapsed_ms"] = round(exec_elapsed * 1000, 2)
+
+    display_results(result)
+
+
+def main() -> None:
+    """Main CLI entry point."""
+    args = parse_args()
+    setup_logging(verbose=args.verbose)
+
+    console.print(BANNER)
+
+    # Initialize clients
+    llm_client = LLMClient(
+        base_url=args.ollama_url,
+        model=args.model,
+    )
+    mcp_client = MCPClient(base_url=args.mcp_url)
+    sql_generator = SQLGenerator(llm_client=llm_client)
+
+    # Check service connectivity
+    if not check_services(llm_client, mcp_client):
+        console.print(
+            "\n[bold red]Some services are not available. "
+            "Please ensure Ollama and MCP server are running.[/bold red]"
+        )
+        console.print(
+            "[dim]Continuing anyway — errors will appear when you submit queries.[/dim]"
+        )
+
+    # Load schema
+    load_schema(mcp_client, sql_generator, args.schema)
+
+    console.print(
+        '\n[dim]Type a natural language question, or "help" for commands.[/dim]\n'
+    )
+
+    # Main REPL loop
+    while True:
+        try:
+            user_input = console.input(
+                "[bold green]pg-assistant>[/bold green] "
+            ).strip()
+        except (KeyboardInterrupt, EOFError):
+            console.print("\n[dim]Goodbye![/dim]")
+            sys.exit(0)
+
+        if not user_input:
+            continue
+
+        command = user_input.lower()
+
+        if command in ("exit", "quit"):
+            console.print("[dim]Goodbye![/dim]")
+            sys.exit(0)
+
+        if command == "help":
+            console.print(HELP_TEXT)
+            continue
+
+        if command == "schema":
+            load_schema(mcp_client, sql_generator, args.schema)
+            continue
+
+        if command == "clear":
+            console.clear()
+            continue
+
+        process_query(user_input, sql_generator, mcp_client)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/pg-assistant/llm_client.py b/tools/pg-assistant/llm_client.py
new file mode 100644
index 0000000..bb34846
--- /dev/null
+++ b/tools/pg-assistant/llm_client.py
@@ -0,0 +1,115 @@
+"""LLM client module for communicating with Ollama API."""
+
+import logging
+import time
+from typing import Optional
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_OLLAMA_URL = "http://localhost:11434"
+DEFAULT_MODEL = "codellama"
+DEFAULT_TIMEOUT = 120
+
+
+class LLMClient:
+    """Client for interacting with the Ollama LLM API."""
+
+    def __init__(
+        self,
+        base_url: str = DEFAULT_OLLAMA_URL,
+        model: str = DEFAULT_MODEL,
+        timeout: int = DEFAULT_TIMEOUT,
+    ) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.model = model
+        self.timeout = timeout
+        self.generate_url = f"{self.base_url}/api/generate"
+
+    def generate(self, prompt: str, system_prompt: str = "") -> str:
+        """Send a prompt to Ollama and return the generated text.
+
+        Args:
+            prompt: The user prompt to send.
+            system_prompt: Optional system-level instruction.
+
+        Returns:
+            The generated text response.
+
+        Raises:
+            ConnectionError: If the Ollama server is unreachable.
+            RuntimeError: If the API returns an error.
+        """
+        payload: dict = {
+            "model": self.model,
+            "prompt": prompt,
+            "stream": False,
+        }
+        if system_prompt:
+            payload["system"] = system_prompt
+
+        logger.debug("Sending request to Ollama: model=%s", self.model)
+        start = time.monotonic()
+
+        try:
+            response = requests.post(
+                self.generate_url,
+                json=payload,
+                timeout=self.timeout,
+            )
+        except requests.ConnectionError as exc:
+            logger.error("Cannot reach Ollama at %s", self.base_url)
+            raise ConnectionError(
+                f"Cannot connect to Ollama at {self.base_url}. "
+                "Ensure Ollama is running (ollama serve)."
+            ) from exc
+        except requests.Timeout as exc:
+            logger.error("Ollama request timed out after %ds", self.timeout)
+            raise RuntimeError(
+                f"Ollama request timed out after {self.timeout}s."
+            ) from exc
+
+        elapsed = time.monotonic() - start
+        logger.debug("Ollama responded in %.2fs", elapsed)
+
+        if response.status_code != 200:
+            error_detail = response.text[:500]
+            logger.error("Ollama API error %d: %s", response.status_code, error_detail)
+            raise RuntimeError(
+                f"Ollama API returned status {response.status_code}: {error_detail}"
+            )
+
+        data = response.json()
+        generated_text: str = data.get("response", "").strip()
+
+        if not generated_text:
+            logger.warning("Ollama returned an empty response")
+
+        return generated_text
+
+    def health_check(self) -> bool:
+        """Check whether the Ollama server is reachable.
+
+        Returns:
+            True if the server responds, False otherwise.
+        """
+        try:
+            response = requests.get(f"{self.base_url}/", timeout=5)
+            return response.status_code == 200
+        except (requests.ConnectionError, requests.Timeout):
+            return False
+
+    def list_models(self) -> Optional[list]:
+        """List available models on the Ollama server.
+
+        Returns:
+            A list of model info dicts, or None on failure.
+        """
+        try:
+            response = requests.get(f"{self.base_url}/api/tags", timeout=10)
+            if response.status_code == 200:
+                return response.json().get("models", [])
+        except (requests.ConnectionError, requests.Timeout):
+            pass
+        return None
diff --git a/tools/pg-assistant/mcp_client.py b/tools/pg-assistant/mcp_client.py
new file mode 100644
index 0000000..e4bc230
--- /dev/null
+++ b/tools/pg-assistant/mcp_client.py
@@ -0,0 +1,174 @@
+"""MCP (Model Context Protocol) client for PostgreSQL server communication."""
+
+import logging
+import time
+from typing import Any, Optional
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MCP_URL = "http://localhost:3000"
+DEFAULT_TIMEOUT = 30
+
+
+class MCPClient:
+    """Client for interacting with the MCP PostgreSQL server."""
+
+    def __init__(
+        self,
+        base_url: str = DEFAULT_MCP_URL,
+        timeout: int = DEFAULT_TIMEOUT,
+    ) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.timeout = timeout
+
+    def execute_query(self, sql: str) -> dict[str, Any]:
+        """Execute a SQL query via the MCP server.
+
+        Args:
+            sql: The SQL query string to execute.
+
+        Returns:
+            A dict with keys 'columns' and 'rows' on success,
+            or 'error' on failure.
+
+        Raises:
+            ConnectionError: If the MCP server is unreachable.
+            RuntimeError: If the MCP server returns an error response.
+        """
+        payload = {
+            "method": "query",
+            "params": {"sql": sql},
+        }
+
+        logger.debug("Sending query to MCP server: %s", sql[:200])
+        start = time.monotonic()
+
+        try:
+            response = requests.post(
+                self.base_url,
+                json=payload,
+                timeout=self.timeout,
+            )
+        except requests.ConnectionError as exc:
+            logger.error("Cannot reach MCP server at %s", self.base_url)
+            raise ConnectionError(
+                f"Cannot connect to MCP server at {self.base_url}. "
+                "Ensure the MCP PostgreSQL server is running."
+            ) from exc
+        except requests.Timeout as exc:
+            logger.error("MCP request timed out after %ds", self.timeout)
+            raise RuntimeError(
+                f"MCP server request timed out after {self.timeout}s."
+            ) from exc
+
+        elapsed = time.monotonic() - start
+        logger.debug("MCP server responded in %.2fs", elapsed)
+
+        if response.status_code != 200:
+            error_detail = response.text[:500]
+            logger.error("MCP server error %d: %s", response.status_code, error_detail)
+            raise RuntimeError(
+                f"MCP server returned status {response.status_code}: {error_detail}"
+            )
+
+        data: dict[str, Any] = response.json()
+
+        if "error" in data:
+            error_msg = data["error"]
+            logger.warning("MCP query error: %s", error_msg)
+            return {"error": str(error_msg)}
+
+        return {
+            "columns": data.get("columns", []),
+            "rows": data.get("rows", []),
+            "row_count": data.get("rowCount", len(data.get("rows", []))),
+            "elapsed_ms": round(elapsed * 1000, 2),
+        }
+
+    def get_schema(self, schema_name: str = "public") -> Optional[dict[str, Any]]:
+        """Retrieve database schema metadata via the MCP server.
+
+        Args:
+            schema_name: The PostgreSQL schema to inspect.
+
+        Returns:
+            Schema metadata dict, or None on failure.
+        """
+        sql = f"""
+            SELECT
+                t.table_name,
+                c.column_name,
+                c.data_type,
+                c.is_nullable,
+                c.column_default
+            FROM information_schema.tables t
+            JOIN information_schema.columns c
+                ON t.table_name = c.table_name
+                AND t.table_schema = c.table_schema
+            WHERE t.table_schema = '{schema_name}'
+                AND t.table_type = 'BASE TABLE'
+            ORDER BY t.table_name, c.ordinal_position;
+        """
+        try:
+            result = self.execute_query(sql)
+            if "error" in result:
+                logger.warning("Failed to fetch schema: %s", result["error"])
+                return None
+            return self._parse_schema(result)
+        except (ConnectionError, RuntimeError) as exc:
+            logger.warning("Failed to fetch schema: %s", exc)
+            return None
+
+    def health_check(self) -> bool:
+        """Check whether the MCP server is reachable.
+
+        Returns:
+            True if the server responds, False otherwise.
+        """
+        try:
+            response = requests.get(self.base_url, timeout=5)
+            return response.status_code < 500
+        except (requests.ConnectionError, requests.Timeout):
+            return False
+
+    @staticmethod
+    def _parse_schema(result: dict[str, Any]) -> dict[str, list[dict[str, str]]]:
+        """Parse raw schema query results into a structured dict.
+
+        Args:
+            result: The raw query result from execute_query.
+
+        Returns:
+            A dict mapping table names to lists of column info dicts.
+        """
+        schema: dict[str, list[dict[str, str]]] = {}
+        columns = result.get("columns", [])
+        rows = result.get("rows", [])
+
+        for row in rows:
+            if isinstance(row, dict):
+                table = row.get("table_name", "")
+                col_info = {
+                    "column_name": row.get("column_name", ""),
+                    "data_type": row.get("data_type", ""),
+                    "is_nullable": row.get("is_nullable", ""),
+                    "column_default": row.get("column_default", ""),
+                }
+            elif isinstance(row, (list, tuple)) and len(columns) >= 5:
+                table = str(row[0])
+                col_info = {
+                    "column_name": str(row[1]),
+                    "data_type": str(row[2]),
+                    "is_nullable": str(row[3]),
+                    "column_default": str(row[4]) if row[4] else "",
+                }
+            else:
+                continue
+
+            if table not in schema:
+                schema[table] = []
+            schema[table].append(col_info)
+
+        return schema
diff --git a/tools/pg-assistant/requirements.txt b/tools/pg-assistant/requirements.txt
new file mode 100644
index 0000000..8f33262
--- /dev/null
+++ b/tools/pg-assistant/requirements.txt
@@ -0,0 +1,2 @@
+requests>=2.31.0,<3.0.0
+rich>=13.7.0,<15.0.0
diff --git a/tools/pg-assistant/sql_generator.py b/tools/pg-assistant/sql_generator.py
new file mode 100644
index 0000000..42dd313
--- /dev/null
+++ b/tools/pg-assistant/sql_generator.py
@@ -0,0 +1,235 @@
+"""SQL generation module with prompt engineering and safety validation."""
+
+import logging
+import re
+from typing import Any, Optional
+
+from llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+SYSTEM_PROMPT = (
+    "You are a PostgreSQL expert. You receive natural language questions about "
+    "a PostgreSQL database and return ONLY valid SQL SELECT queries. "
+    "Rules:\n"
+    "- Return ONLY the SQL query, nothing else.\n"
+    "- Do NOT include explanations, comments, or markdown formatting.\n"
+    "- Do NOT use DROP, DELETE, TRUNCATE, UPDATE, INSERT, ALTER, CREATE, or GRANT.\n"
+    "- Only generate SELECT statements.\n"
+    "- Always terminate the query with a semicolon.\n"
+    "- If the question cannot be answered with a SELECT query, respond with: "
+    "-- CANNOT_GENERATE"
+)
+
+DANGEROUS_KEYWORDS = frozenset(
+    {
+        "DROP",
+        "DELETE",
+        "TRUNCATE",
+        "UPDATE",
+        "INSERT",
+        "ALTER",
+        "CREATE",
+        "GRANT",
+        "REVOKE",
+        "EXEC",
+        "EXECUTE",
+    }
+)
+
+MAX_RETRIES = 2
+
+
+class SQLGenerationError(Exception):
+    """Raised when SQL generation fails after retries."""
+
+
+class UnsafeSQLError(Exception):
+    """Raised when generated SQL contains dangerous operations."""
+
+
+class SQLGenerator:
+    """Generates safe SQL queries from natural language using an LLM."""
+
+    def __init__(
+        self,
+        llm_client: LLMClient,
+        schema_metadata: Optional[dict[str, Any]] = None,
+    ) -> None:
+        self.llm_client = llm_client
+        self.schema_metadata = schema_metadata
+
+    def update_schema(self, schema_metadata: dict[str, Any]) -> None:
+        """Update the schema metadata used for prompt context.
+
+        Args:
+            schema_metadata: Dict mapping table names to column info lists.
+        """
+        self.schema_metadata = schema_metadata
+        logger.info("Schema metadata updated: %d tables", len(schema_metadata))
+
+    def generate_sql(self, user_query: str) -> str:
+        """Generate a SQL query from a natural language question.
+
+        Retries up to MAX_RETRIES times if validation fails.
+
+        Args:
+            user_query: The natural language question.
+
+        Returns:
+            A validated SQL SELECT query string.
+
+        Raises:
+            SQLGenerationError: If generation fails after all retries.
+            UnsafeSQLError: If the generated SQL is unsafe.
+        """
+        prompt = self._build_prompt(user_query)
+
+        last_error: Optional[str] = None
+        for attempt in range(1, MAX_RETRIES + 1):
+            logger.info("SQL generation attempt %d/%d", attempt, MAX_RETRIES)
+
+            retry_prompt = prompt
+            if last_error and attempt > 1:
+                retry_prompt += (
+                    f"\n\nPrevious attempt failed with error: {last_error}\n"
+                    "Please generate a corrected SQL query."
+                )
+
+            try:
+                raw_response = self.llm_client.generate(
+                    prompt=retry_prompt,
+                    system_prompt=SYSTEM_PROMPT,
+                )
+            except (ConnectionError, RuntimeError) as exc:
+                logger.error("LLM request failed: %s", exc)
+                raise SQLGenerationError(
+                    f"Failed to communicate with LLM: {exc}"
+                ) from exc
+
+            sql = self._extract_sql(raw_response)
+
+            if not sql or sql == "-- CANNOT_GENERATE":
+                last_error = "LLM could not generate a valid query"
+                logger.warning("Attempt %d: %s", attempt, last_error)
+                continue
+
+            try:
+                self._validate_sql(sql)
+                return sql
+            except UnsafeSQLError:
+                raise
+            except ValueError as exc:
+                last_error = str(exc)
+                logger.warning("Attempt %d validation failed: %s", attempt, last_error)
+                continue
+
+        raise SQLGenerationError(
+            f"Failed to generate valid SQL after {MAX_RETRIES} attempts. "
+            f"Last error: {last_error}"
+        )
+
+    def _build_prompt(self, user_query: str) -> str:
+        """Build the full prompt including schema context.
+
+        Args:
+            user_query: The natural language question.
+
+        Returns:
+            The complete prompt string.
+        """
+        parts = []
+
+        if self.schema_metadata:
+            parts.append("Database schema:")
+            for table_name, columns in self.schema_metadata.items():
+                col_defs = []
+                for col in columns:
+                    nullable = "NULL" if col["is_nullable"] == "YES" else "NOT NULL"
+                    default = (
+                        f" DEFAULT {col['column_default']}"
+                        if col.get("column_default")
+                        else ""
+                    )
+                    col_defs.append(
+                        f"  {col['column_name']} {col['data_type']} {nullable}{default}"
+                    )
+                parts.append(f"TABLE {table_name} (\n" + ",\n".join(col_defs) + "\n)")
+            parts.append("")
+
+        parts.append(f"Question: {user_query}")
+        parts.append("SQL:")
+
+        return "\n".join(parts)
+
+    @staticmethod
+    def _extract_sql(raw_response: str) -> str:
+        """Extract clean SQL from the LLM response.
+
+        Strips markdown code blocks, comments, and extra whitespace.
+
+        Args:
+            raw_response: The raw LLM output.
+
+        Returns:
+            A cleaned SQL string.
+        """
+        text = raw_response.strip()
+
+        # Remove markdown code fences
+        code_block_match = re.search(
+            r"```(?:sql)?\s*\n?(.*?)\n?```", text, re.DOTALL | re.IGNORECASE
+        )
+        if code_block_match:
+            text = code_block_match.group(1).strip()
+
+        # Remove leading/trailing comments
+        lines = []
+        for line in text.split("\n"):
+            stripped = line.strip()
+            if stripped and not stripped.startswith("--"):
+                lines.append(line)
+            elif stripped == "-- CANNOT_GENERATE":
+                return "-- CANNOT_GENERATE"
+
+        sql = "\n".join(lines).strip()
+
+        # Ensure trailing semicolon
+        if sql and not sql.endswith(";"):
+            sql += ";"
+
+        return sql
+
+    @staticmethod
+    def _validate_sql(sql: str) -> None:
+        """Validate that the SQL is a safe SELECT query.
+
+        Args:
+            sql: The SQL query to validate.
+
+        Raises:
+            UnsafeSQLError: If the query contains dangerous keywords.
+            ValueError: If the query is not a valid SELECT statement.
+        """
+        if not sql:
+            raise ValueError("Empty SQL query")
+
+        normalized = sql.upper().strip()
+
+        # Remove string literals to avoid false positives on keywords inside quotes
+        sanitized = re.sub(r"'[^']*'", "''", normalized)
+
+        # Check for dangerous keywords as standalone words
+        for keyword in DANGEROUS_KEYWORDS:
+            pattern = rf"\b{keyword}\b"
+            if re.search(pattern, sanitized):
+                raise UnsafeSQLError(
+                    f"Unsafe SQL detected: query contains '{keyword}'. "
+                    "Only SELECT queries are allowed."
+                )
+
+        # Verify it starts with SELECT or WITH (for CTEs)
+        if not (sanitized.startswith("SELECT") or sanitized.startswith("WITH")):
+            raise ValueError(
+                f"Query must start with SELECT or WITH. Got: {sql[:50]}..."
+            )

From 86aa9c77422b931627059d4255d02d89892137c5 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sat, 4 Apr 2026 17:06:16 +0000
Subject: [PATCH 02/19] Refactor pg-assistant: Streamlit UI, direct PostgreSQL,
 connection profiles

- Replace CLI (app.py) with Streamlit web UI
- Replace MCP client with direct PostgreSQL connection via psycopg2 (db_client.py)
- Add connection profile manager for save/load DB configs (profile_manager.py)
- Update requirements.txt with streamlit, psycopg2-binary, pandas
- Update README with new architecture and usage docs
- Keep llm_client.py and sql_generator.py unchanged
---
 tools/pg-assistant/README.md          |  95 ++--
 tools/pg-assistant/app.py             | 632 +++++++++++++-------------
 tools/pg-assistant/db_client.py       | 174 +++++++
 tools/pg-assistant/profile_manager.py | 113 +++++
 tools/pg-assistant/requirements.txt   |   4 +-
 5 files changed, 658 insertions(+), 360 deletions(-)
 create mode 100644 tools/pg-assistant/db_client.py
 create mode 100644 tools/pg-assistant/profile_manager.py

diff --git a/tools/pg-assistant/README.md b/tools/pg-assistant/README.md
index dda1004..7d240c9 100644
--- a/tools/pg-assistant/README.md
+++ b/tools/pg-assistant/README.md
@@ -1,6 +1,6 @@
-# pg-assistant — AI-Powered PostgreSQL CLI
+# pg-assistant — AI-Powered PostgreSQL Assistant
 
-A production-ready Python CLI that converts natural language questions into SQL queries using a local LLM (Ollama) and executes them against PostgreSQL via an MCP server.
+A Streamlit web UI that converts natural language questions into SQL queries using a local LLM (Ollama) and executes them directly against PostgreSQL. Includes connection profile management for saving and loading database configurations.
 
 ## Architecture
 
@@ -18,19 +18,20 @@ User Question (natural language)
          │ validated SELECT query
          ▼
 ┌──────────────────┐
-│   mcp_client     │──→ MCP PostgreSQL Server
+│    db_client     │──→ PostgreSQL (direct via psycopg2)
 └──────────────────┘
          │
          ▼
-   Formatted Results (rich tables)
+   Streamlit Web UI (tables, charts, CSV export)
 ```
 
-| Module            | Responsibility                                  |
-|-------------------|--------------------------------------------------|
-| `app.py`          | CLI loop, argument parsing, rich output          |
-| `llm_client.py`   | Ollama API communication                        |
-| `mcp_client.py`   | MCP PostgreSQL server communication             |
-| `sql_generator.py` | Prompt engineering, SQL extraction, safety checks |
+| Module              | Responsibility                                    |
+|---------------------|---------------------------------------------------|
+| `app.py`            | Streamlit web UI                                  |
+| `llm_client.py`     | Ollama API communication                          |
+| `db_client.py`      | Direct PostgreSQL connection via psycopg2          |
+| `sql_generator.py`  | Prompt engineering, SQL extraction, safety checks  |
+| `profile_manager.py`| Save / load database connection profiles (JSON)    |
 
 ## Prerequisites
 
@@ -40,8 +41,7 @@ User Question (natural language)
   ollama serve &
   ollama pull codellama
   ```
-- **MCP PostgreSQL server** running on `http://localhost:3000`
-- **PostgreSQL** with `pg_stat_statements` enabled
+- **PostgreSQL** database accessible from the machine running pg-assistant
 
 ## Installation
 
@@ -53,54 +53,39 @@ pip install -r requirements.txt
 ## Usage
 
 ```bash
-# Basic usage (defaults: Ollama on :11434, MCP on :3000)
-python app.py
+# Start the Streamlit web UI
+streamlit run app.py
 
-# Custom endpoints
-python app.py --ollama-url http://localhost:11434 --mcp-url http://localhost:3000
-
-# Use a different model
-python app.py --model mistral
+# Or with a custom port
+streamlit run app.py --server.port 8502
+```
 
-# Verbose/debug logging
-python app.py -v
+Then open the URL shown in your terminal (default: `http://localhost:8501`).
 
-# Specify a PostgreSQL schema
-python app.py --schema my_schema
-```
+### Web UI Features
 
-### CLI Commands
+| Feature                | Description                                        |
+|------------------------|----------------------------------------------------|
+| **Ollama Settings**    | Configure Ollama URL and model in the sidebar       |
+| **DB Connection**      | Enter host, port, database, user, password, SSL     |
+| **Connection Profiles**| Save, load, and delete database connection profiles |
+| **Query Tab**          | Type natural language questions, view generated SQL  |
+| **Schema Tab**         | Browse database tables and columns                  |
+| **History Tab**        | Review past queries and results                     |
+| **CSV Export**         | Download query results as CSV                       |
 
-| Command    | Description                          |
-|------------|--------------------------------------|
-| `help`     | Show available commands and examples |
-| `schema`   | Refresh and display database schema  |
-| `clear`    | Clear the terminal screen            |
-| `exit`     | Quit the application                 |
+### Connection Profiles
 
-### Example Session
+Profiles are saved to `~/.pg-assistant/profiles.json`. Each profile stores:
+- Host, port, database name
+- Username and password
+- SSL mode
 
-```
-pg-assistant> Show me the top 5 largest tables
-
-┌─────────────────────────────────────────────────┐
-│ Generated SQL                                   │
-├─────────────────────────────────────────────────┤
-│ SELECT schemaname, relname, n_live_tup           │
-│ FROM pg_stat_user_tables                         │
-│ ORDER BY n_live_tup DESC                         │
-│ LIMIT 5;                                         │
-└─────────────────────────────────────────────────┘
-
-┌─────────────┬──────────┬────────────┐
-│ schemaname  │ relname  │ n_live_tup │
-├─────────────┼──────────┼────────────┤
-│ public      │ orders   │ 1000000    │
-│ public      │ users    │ 500000     │
-│ ...         │ ...      │ ...        │
-└─────────────┴──────────┴────────────┘
-5 row(s) returned in 42ms
-```
+To use profiles:
+1. Fill in connection details in the sidebar
+2. Enter a profile name and click **Save Current Settings**
+3. Next time, select the profile from the **Load Profile** dropdown
+4. Click **Connect** to establish the connection
 
 ## SQL Safety
 
@@ -112,6 +97,6 @@ The assistant enforces **read-only access** by:
 
 ## Schema Awareness
 
-On startup, the assistant fetches `information_schema` metadata and injects it into every LLM prompt. This provides the model with table names, column names, data types, and constraints — significantly improving SQL generation accuracy.
+On connection, the assistant fetches `information_schema` metadata and injects it into every LLM prompt. This provides the model with table names, column names, data types, and constraints — significantly improving SQL generation accuracy.
 
-Refresh the schema at any time with the `schema` command.
+Refresh the schema at any time via the **Schema** tab.
diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index d1437f6..9bf4edc 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -1,326 +1,350 @@
 #!/usr/bin/env python3
-"""AI-powered PostgreSQL assistant CLI application.
+"""AI-powered PostgreSQL assistant — Streamlit web UI.
 
 Converts natural language questions into SQL queries using a local LLM (Ollama)
-and executes them against a PostgreSQL database via an MCP server.
+and executes them directly against a PostgreSQL database.
 """
 
-import argparse
-import logging
-import sys
 import time
 
-from rich.console import Console
-from rich.logging import RichHandler
-from rich.panel import Panel
-from rich.table import Table
-from rich.text import Text
+import pandas as pd
+import streamlit as st
 
+from db_client import DBClient
 from llm_client import LLMClient
-from mcp_client import MCPClient
+from profile_manager import ProfileManager
 from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError
 
-console = Console()
-
-HELP_TEXT = """
-[bold cyan]Available Commands:[/bold cyan]
-
-  [green]exit[/green] / [green]quit[/green]       Quit the application
-  [green]help[/green]              Show this help message
-  [green]schema[/green]            Refresh and display the database schema
-  [green]clear[/green]             Clear the terminal screen
-
-[bold cyan]Example Questions:[/bold cyan]
-
-  • Show me all tables in the database
-  • What are the top 10 largest tables by row count?
-  • List all active connections to the database
-  • Show the slowest queries from pg_stat_statements
-  • What indexes exist on the users table?
-  • Show me the table structure for the orders table
-"""
-
-BANNER = r"""
-[bold cyan]╔══════════════════════════════════════════════════╗
-║        AI PostgreSQL Assistant (pg-assistant)     ║
-║  Natural Language → SQL via Ollama + MCP Server   ║
-╚══════════════════════════════════════════════════╝[/bold cyan]
-"""
-
-
-def setup_logging(verbose: bool = False) -> None:
-    """Configure logging with rich handler."""
-    level = logging.DEBUG if verbose else logging.INFO
-    logging.basicConfig(
-        level=level,
-        format="%(message)s",
-        datefmt="[%X]",
-        handlers=[RichHandler(console=console, rich_tracebacks=True, show_path=False)],
-    )
-
-
-def parse_args() -> argparse.Namespace:
-    """Parse command-line arguments."""
-    parser = argparse.ArgumentParser(
-        description="AI-powered PostgreSQL assistant using Ollama and MCP",
-    )
-    parser.add_argument(
-        "--ollama-url",
-        default="http://localhost:11434",
-        help="Ollama server URL (default: http://localhost:11434)",
-    )
-    parser.add_argument(
-        "--mcp-url",
-        default="http://localhost:3000",
-        help="MCP PostgreSQL server URL (default: http://localhost:3000)",
-    )
-    parser.add_argument(
-        "--model",
-        default="codellama",
-        help="Ollama model name (default: codellama)",
-    )
-    parser.add_argument(
-        "--schema",
-        default="public",
-        help="PostgreSQL schema to use for context (default: public)",
-    )
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action="store_true",
-        help="Enable verbose/debug logging",
-    )
-    return parser.parse_args()
-
-
-def check_services(llm_client: LLMClient, mcp_client: MCPClient) -> bool:
-    """Verify that Ollama and MCP services are reachable."""
-    all_ok = True
-
-    with console.status("[bold yellow]Checking Ollama server..."):
-        if llm_client.health_check():
-            console.print("  [green]✓[/green] Ollama server is reachable")
-            models = llm_client.list_models()
-            if models:
-                model_names = [m.get("name", "unknown") for m in models]
-                console.print(f"    Available models: {', '.join(model_names)}")
+# ---------------------------------------------------------------------------
+# Page config
+# ---------------------------------------------------------------------------
+st.set_page_config(
+    page_title="PG Assistant",
+    page_icon="🐘",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+
+# ---------------------------------------------------------------------------
+# Session-state defaults
+# ---------------------------------------------------------------------------
+_defaults: dict = {
+    "db_client": None,
+    "llm_client": None,
+    "sql_generator": None,
+    "schema_metadata": None,
+    "query_history": [],
+}
+for _key, _val in _defaults.items():
+    if _key not in st.session_state:
+        st.session_state[_key] = _val
+
+profile_mgr = ProfileManager()
+
+# ---------------------------------------------------------------------------
+# Sidebar — connection & profile management
+# ---------------------------------------------------------------------------
+with st.sidebar:
+    st.title("🐘 PG Assistant")
+    st.caption("AI-powered PostgreSQL query tool")
+    st.divider()
+
+    # --- Ollama settings ---------------------------------------------------
+    st.subheader("🤖 Ollama Settings")
+    ollama_url = st.text_input("Ollama URL", value="http://localhost:11434")
+    ollama_model = st.text_input("Model", value="codellama")
+
+    if st.button("Test Ollama Connection"):
+        test_llm = LLMClient(base_url=ollama_url, model=ollama_model)
+        if test_llm.health_check():
+            models = test_llm.list_models()
+            model_names = [m.get("name", "?") for m in (models or [])]
+            st.success(f"Connected! Models: {', '.join(model_names)}")
         else:
-            console.print(
-                f"  [red]✗[/red] Cannot reach Ollama at {llm_client.base_url}"
-            )
-            all_ok = False
+            st.error(f"Cannot reach Ollama at {ollama_url}")
+
+    st.divider()
+
+    # --- Database connection ------------------------------------------------
+    st.subheader("🗄️ Database Connection")
+
+    saved_profiles = profile_mgr.list_profiles()
+    profile_options = ["-- New Connection --"] + saved_profiles
+    selected_profile = st.selectbox("Load Profile", profile_options)
+
+    profile_data: dict = {}
+    if selected_profile != "-- New Connection --":
+        profile_data = profile_mgr.get_profile(selected_profile) or {}
+
+    col1, col2 = st.columns(2)
+    with col1:
+        db_host = st.text_input("Host", value=profile_data.get("host", "localhost"))
+        db_port = st.number_input(
+            "Port",
+            value=profile_data.get("port", 5432),
+            min_value=1,
+            max_value=65535,
+            step=1,
+        )
+        db_name = st.text_input(
+            "Database", value=profile_data.get("database", "postgres")
+        )
+    with col2:
+        db_user = st.text_input("User", value=profile_data.get("user", "postgres"))
+        db_password = st.text_input(
+            "Password",
+            value=profile_data.get("password", ""),
+            type="password",
+        )
+        db_sslmode = st.selectbox(
+            "SSL Mode",
+            ["prefer", "disable", "require", "verify-ca", "verify-full"],
+            index=[
+                "prefer",
+                "disable",
+                "require",
+                "verify-ca",
+                "verify-full",
+            ].index(profile_data.get("sslmode", "prefer")),
+        )
 
-    with console.status("[bold yellow]Checking MCP server..."):
-        if mcp_client.health_check():
-            console.print("  [green]✓[/green] MCP PostgreSQL server is reachable")
+    if st.button("🔌 Connect", use_container_width=True, type="primary"):
+        try:
+            db = DBClient(
+                host=db_host,
+                port=int(db_port),
+                database=db_name,
+                user=db_user,
+                password=db_password,
+                sslmode=db_sslmode,
+            )
+            db.connect()
+            st.session_state.db_client = db
+
+            llm = LLMClient(base_url=ollama_url, model=ollama_model)
+            st.session_state.llm_client = llm
+            gen = SQLGenerator(llm_client=llm)
+            st.session_state.sql_generator = gen
+
+            schema = db.get_schema()
+            if schema:
+                gen.update_schema(schema)
+                st.session_state.schema_metadata = schema
+
+            st.success(f"Connected to {db.get_connection_info()}")
+        except ConnectionError as exc:
+            st.error(str(exc))
+
+    if st.session_state.db_client and st.session_state.db_client.is_connected:
+        if st.button("Disconnect", use_container_width=True):
+            st.session_state.db_client.disconnect()
+            st.session_state.db_client = None
+            st.session_state.sql_generator = None
+            st.session_state.schema_metadata = None
+            st.rerun()
+
+    st.divider()
+
+    # --- Profile save / delete ----------------------------------------------
+    st.subheader("💾 Save Profile")
+    profile_name = st.text_input("Profile Name", placeholder="e.g. production-db")
+    if st.button("Save Current Settings", use_container_width=True):
+        if not profile_name:
+            st.warning("Enter a profile name first.")
         else:
-            console.print(
-                f"  [red]✗[/red] Cannot reach MCP server at {mcp_client.base_url}"
+            profile_mgr.save_profile(
+                name=profile_name,
+                host=db_host,
+                port=int(db_port),
+                database=db_name,
+                user=db_user,
+                password=db_password,
+                sslmode=db_sslmode,
             )
-            all_ok = False
-
-    return all_ok
-
-
-def load_schema(
-    mcp_client: MCPClient,
-    sql_generator: SQLGenerator,
-    schema_name: str,
-) -> None:
-    """Load and display database schema metadata."""
-    with console.status("[bold yellow]Loading database schema..."):
-        schema = mcp_client.get_schema(schema_name)
-
-    if schema:
-        sql_generator.update_schema(schema)
-        display_schema(schema)
-    else:
-        console.print(
-            "[yellow]⚠ Could not load schema metadata. "
-            "SQL generation will proceed without schema context.[/yellow]"
+            st.success(f"Profile '{profile_name}' saved!")
+            st.rerun()
+
+    if saved_profiles:
+        st.divider()
+        st.subheader("🗑️ Delete Profile")
+        delete_target = st.selectbox(
+            "Select profile", saved_profiles, key="del_profile"
         )
-
-
-def display_schema(schema: dict) -> None:
-    """Render the database schema as a rich table."""
-    table = Table(
-        title="Database Schema",
-        show_header=True,
-        header_style="bold magenta",
+        if st.button("Delete", use_container_width=True):
+            profile_mgr.delete_profile(delete_target)
+            st.success(f"Profile '{delete_target}' deleted.")
+            st.rerun()
+
+# ---------------------------------------------------------------------------
+# Main area
+# ---------------------------------------------------------------------------
+st.header("🐘 AI PostgreSQL Assistant")
+
+if st.session_state.db_client and st.session_state.db_client.is_connected:
+    st.info(
+        f"Connected to **{st.session_state.db_client.get_connection_info()}** "
+        f"| Model: **{ollama_model}**"
     )
-    table.add_column("Table", style="cyan", no_wrap=True)
-    table.add_column("Column", style="green")
-    table.add_column("Type", style="yellow")
-    table.add_column("Nullable", style="dim")
-
-    for table_name, columns in schema.items():
-        for i, col in enumerate(columns):
-            table.add_row(
-                table_name if i == 0 else "",
-                col["column_name"],
-                col["data_type"],
-                col["is_nullable"],
-            )
-        table.add_section()
-
-    console.print(table)
-
-
-def display_results(result: dict) -> None:
-    """Render query results as a rich table."""
-    if "error" in result:
-        console.print(f"\n[red]Query Error:[/red] {result['error']}")
-        return
-
-    columns = result.get("columns", [])
-    rows = result.get("rows", [])
-    row_count = result.get("row_count", len(rows))
-    elapsed_ms = result.get("elapsed_ms", 0)
-
-    if not rows:
-        console.print("\n[yellow]Query returned no results.[/yellow]")
-        return
-
-    table = Table(
-        title="Query Results",
-        show_header=True,
-        header_style="bold magenta",
-        show_lines=True,
-    )
-
-    # Determine column names
-    if columns:
-        col_names = columns
-    elif rows and isinstance(rows[0], dict):
-        col_names = list(rows[0].keys())
-    else:
-        col_names = [f"col_{i}" for i in range(len(rows[0]) if rows else 0)]
-
-    for col_name in col_names:
-        table.add_column(str(col_name), style="cyan", overflow="fold")
-
-    for row in rows:
-        if isinstance(row, dict):
-            table.add_row(*[str(v) if v is not None else "NULL" for v in row.values()])
-        elif isinstance(row, (list, tuple)):
-            table.add_row(*[str(v) if v is not None else "NULL" for v in row])
-
-    console.print(table)
-    console.print(f"\n[dim]{row_count} row(s) returned in {elapsed_ms}ms[/dim]")
-
-
-def process_query(
-    user_input: str,
-    sql_generator: SQLGenerator,
-    mcp_client: MCPClient,
-) -> None:
-    """Process a natural language query end-to-end."""
-    # Step 1: Generate SQL
-    console.print()
-    with console.status("[bold yellow]Generating SQL..."):
-        start_gen = time.monotonic()
-        try:
-            sql = sql_generator.generate_sql(user_input)
-        except UnsafeSQLError as exc:
-            console.print(f"\n[red]Safety Block:[/red] {exc}")
-            return
-        except SQLGenerationError as exc:
-            console.print(f"\n[red]Generation Error:[/red] {exc}")
-            return
-        gen_elapsed = time.monotonic() - start_gen
-
-    # Step 2: Display generated SQL
-    console.print(
-        Panel(
-            Text(sql, style="green"),
-            title="[bold]Generated SQL[/bold]",
-            subtitle=f"[dim]generated in {gen_elapsed:.2f}s[/dim]",
-            border_style="blue",
-        )
+else:
+    st.warning("Not connected to a database. Use the sidebar to connect.")
+
+# ---------------------------------------------------------------------------
+# Tabs
+# ---------------------------------------------------------------------------
+tab_query, tab_schema, tab_history = st.tabs(["💬 Query", "📋 Schema", "📜 History"])
+
+# ---- Query tab ------------------------------------------------------------
+with tab_query:
+    st.subheader("Ask a question in natural language")
+
+    user_question = st.text_area(
+        "Your question",
+        placeholder="e.g. Show me the top 10 largest tables by row count",
+        height=100,
+        label_visibility="collapsed",
     )
 
-    # Step 3: Execute SQL
-    with console.status("[bold yellow]Executing query..."):
-        start_exec = time.monotonic()
-        try:
-            result = mcp_client.execute_query(sql)
-        except (ConnectionError, RuntimeError) as exc:
-            console.print(f"\n[red]Execution Error:[/red] {exc}")
-            return
-        exec_elapsed = time.monotonic() - start_exec
-
-    # Step 4: Display results
-    if "elapsed_ms" not in result:
-        result["elapsed_ms"] = round(exec_elapsed * 1000, 2)
-
-    display_results(result)
-
-
-def main() -> None:
-    """Main CLI entry point."""
-    args = parse_args()
-    setup_logging(verbose=args.verbose)
-
-    console.print(BANNER)
-
-    # Initialize clients
-    llm_client = LLMClient(
-        base_url=args.ollama_url,
-        model=args.model,
-    )
-    mcp_client = MCPClient(base_url=args.mcp_url)
-    sql_generator = SQLGenerator(llm_client=llm_client)
-
-    # Check service connectivity
-    if not check_services(llm_client, mcp_client):
-        console.print(
-            "\n[bold red]Some services are not available. "
-            "Please ensure Ollama and MCP server are running.[/bold red]"
+    col_run, col_examples = st.columns([1, 3])
+    with col_run:
+        run_btn = st.button(
+            "🚀 Run Query",
+            use_container_width=True,
+            type="primary",
+            disabled=not (
+                st.session_state.db_client
+                and st.session_state.db_client.is_connected
+                and user_question.strip()
+            ),
         )
-        console.print(
-            "[dim]Continuing anyway — errors will appear when you submit queries.[/dim]"
-        )
-
-    # Load schema
-    load_schema(mcp_client, sql_generator, args.schema)
-
-    console.print(
-        '\n[dim]Type a natural language question, or "help" for commands.[/dim]\n'
-    )
-
-    # Main REPL loop
-    while True:
-        try:
-            user_input = console.input(
-                "[bold green]pg-assistant>[/bold green] "
-            ).strip()
-        except (KeyboardInterrupt, EOFError):
-            console.print("\n[dim]Goodbye![/dim]")
-            sys.exit(0)
-
-        if not user_input:
-            continue
-
-        command = user_input.lower()
-
-        if command in ("exit", "quit"):
-            console.print("[dim]Goodbye![/dim]")
-            sys.exit(0)
-
-        if command == "help":
-            console.print(HELP_TEXT)
-            continue
-
-        if command == "schema":
-            load_schema(mcp_client, sql_generator, args.schema)
-            continue
-
-        if command == "clear":
-            console.clear()
-            continue
-
-        process_query(user_input, sql_generator, mcp_client)
+    with col_examples:
+        with st.expander("Example questions"):
+            st.markdown(
+                "- Show me all tables in the database\n"
+                "- What are the top 10 largest tables by row count?\n"
+                "- List all active connections to the database\n"
+                "- Show the slowest queries from pg_stat_statements\n"
+                "- What indexes exist on the users table?\n"
+                "- Show database size for each table"
+            )
 
+    if run_btn and user_question.strip():
+        generator = st.session_state.sql_generator
+        db = st.session_state.db_client
 
-if __name__ == "__main__":
-    main()
+        if not generator or not db:
+            st.error("Connect to a database first.")
+        else:
+            with st.spinner("Generating SQL..."):
+                gen_start = time.monotonic()
+                try:
+                    sql = generator.generate_sql(user_question.strip())
+                    gen_elapsed = time.monotonic() - gen_start
+                except UnsafeSQLError as exc:
+                    st.error(f"**Safety Block:** {exc}")
+                    sql = None
+                    gen_elapsed = 0
+                except SQLGenerationError as exc:
+                    st.error(f"**Generation Error:** {exc}")
+                    sql = None
+                    gen_elapsed = 0
+
+            if sql:
+                st.subheader("Generated SQL")
+                st.code(sql, language="sql")
+                st.caption(f"Generated in {gen_elapsed:.2f}s")
+
+                with st.spinner("Executing query..."):
+                    result = db.execute_query(sql)
+
+                if "error" in result:
+                    st.error(f"**Query Error:** {result['error']}")
+                    st.session_state.query_history.append(
+                        {
+                            "question": user_question.strip(),
+                            "sql": sql,
+                            "status": "error",
+                            "error": result["error"],
+                            "elapsed_ms": result.get("elapsed_ms", 0),
+                        }
+                    )
+                else:
+                    rows = result.get("rows", [])
+                    row_count = result.get("row_count", 0)
+                    elapsed_ms = result.get("elapsed_ms", 0)
+
+                    st.subheader("Results")
+                    if rows:
+                        df = pd.DataFrame(rows)
+                        st.dataframe(df, use_container_width=True)
+                        st.caption(f"{row_count} row(s) returned in {elapsed_ms}ms")
+
+                        csv = df.to_csv(index=False)
+                        st.download_button(
+                            "📥 Download CSV",
+                            csv,
+                            file_name="query_results.csv",
+                            mime="text/csv",
+                        )
+                    else:
+                        st.info("Query returned no results.")
+
+                    st.session_state.query_history.append(
+                        {
+                            "question": user_question.strip(),
+                            "sql": sql,
+                            "status": "success",
+                            "row_count": row_count,
+                            "elapsed_ms": elapsed_ms,
+                        }
+                    )
+
+# ---- Schema tab -----------------------------------------------------------
+with tab_schema:
+    st.subheader("Database Schema")
+
+    if st.session_state.db_client and st.session_state.db_client.is_connected:
+        if st.button("🔄 Refresh Schema"):
+            schema = st.session_state.db_client.get_schema()
+            if schema:
+                if st.session_state.sql_generator:
+                    st.session_state.sql_generator.update_schema(schema)
+                st.session_state.schema_metadata = schema
+                st.success("Schema refreshed!")
+            else:
+                st.warning("Could not load schema.")
+
+        schema = st.session_state.schema_metadata
+        if schema:
+            st.caption(f"{len(schema)} table(s) found")
+            for table_name, columns in schema.items():
+                with st.expander(f"📋 {table_name} ({len(columns)} columns)"):
+                    col_df = pd.DataFrame(columns)
+                    st.dataframe(col_df, use_container_width=True, hide_index=True)
+        else:
+            st.info("No schema loaded. Click 'Refresh Schema' to load.")
+    else:
+        st.warning("Connect to a database first.")
+
+# ---- History tab ----------------------------------------------------------
+with tab_history:
+    st.subheader("Query History")
+
+    history = st.session_state.query_history
+    if history:
+        if st.button("🗑️ Clear History"):
+            st.session_state.query_history = []
+            st.rerun()
+
+        for _i, entry in enumerate(reversed(history), 1):
+            status_label = "[OK]" if entry["status"] == "success" else "[ERR]"
+            with st.expander(f"{status_label} {entry['question'][:80]}"):
+                st.code(entry["sql"], language="sql")
+                if entry["status"] == "success":
+                    st.caption(
+                        f"{entry.get('row_count', 0)} rows | "
+                        f"{entry.get('elapsed_ms', 0)}ms"
+                    )
+                else:
+                    st.error(entry.get("error", "Unknown error"))
+    else:
+        st.info("No queries yet. Ask a question in the Query tab!")
diff --git a/tools/pg-assistant/db_client.py b/tools/pg-assistant/db_client.py
new file mode 100644
index 0000000..8357f86
--- /dev/null
+++ b/tools/pg-assistant/db_client.py
@@ -0,0 +1,174 @@
+"""Direct PostgreSQL database client using psycopg2."""
+
+import logging
+import time
+from typing import Any, Optional
+
+import psycopg2
+import psycopg2.extras
+
+logger = logging.getLogger(__name__)
+
+
+class DBClient:
+    """Client for direct PostgreSQL database connections."""
+
+    def __init__(
+        self,
+        host: str,
+        port: int,
+        database: str,
+        user: str,
+        password: str,
+        sslmode: str = "prefer",
+    ) -> None:
+        self.conn_params = {
+            "host": host,
+            "port": port,
+            "dbname": database,
+            "user": user,
+            "password": password,
+            "sslmode": sslmode,
+        }
+        self._conn: Optional[psycopg2.extensions.connection] = None
+
+    def connect(self) -> None:
+        """Establish a connection to PostgreSQL.
+
+        Raises:
+            ConnectionError: If the database is unreachable.
+        """
+        try:
+            self._conn = psycopg2.connect(**self.conn_params)
+            self._conn.autocommit = True
+            logger.info(
+                "Connected to PostgreSQL at %s:%s/%s",
+                self.conn_params["host"],
+                self.conn_params["port"],
+                self.conn_params["dbname"],
+            )
+        except psycopg2.OperationalError as exc:
+            logger.error("Failed to connect to PostgreSQL: %s", exc)
+            raise ConnectionError(f"Cannot connect to PostgreSQL: {exc}") from exc
+
+    def disconnect(self) -> None:
+        """Close the database connection."""
+        if self._conn and not self._conn.closed:
+            self._conn.close()
+            logger.info("Disconnected from PostgreSQL")
+
+    @property
+    def is_connected(self) -> bool:
+        """Check whether the connection is active."""
+        if self._conn is None or self._conn.closed:
+            return False
+        try:
+            with self._conn.cursor() as cur:
+                cur.execute("SELECT 1")
+            return True
+        except psycopg2.Error:
+            return False
+
+    def execute_query(self, sql: str) -> dict[str, Any]:
+        """Execute a SQL query and return results.
+
+        Args:
+            sql: The SQL query string to execute.
+
+        Returns:
+            A dict with 'columns', 'rows', 'row_count', and 'elapsed_ms'.
+
+        Raises:
+            ConnectionError: If not connected to the database.
+            RuntimeError: If the query fails.
+        """
+        if not self.is_connected:
+            raise ConnectionError("Not connected to PostgreSQL. Please connect first.")
+
+        start = time.monotonic()
+        try:
+            with self._conn.cursor(
+                cursor_factory=psycopg2.extras.RealDictCursor
+            ) as cur:
+                cur.execute(sql)
+                columns = (
+                    [desc[0] for desc in cur.description] if cur.description else []
+                )
+                rows = cur.fetchall() if cur.description else []
+                elapsed = time.monotonic() - start
+                return {
+                    "columns": columns,
+                    "rows": [dict(row) for row in rows],
+                    "row_count": len(rows),
+                    "elapsed_ms": round(elapsed * 1000, 2),
+                }
+        except psycopg2.Error as exc:
+            elapsed = time.monotonic() - start
+            logger.error("Query execution failed: %s", exc)
+            return {
+                "error": str(exc).strip(),
+                "elapsed_ms": round(elapsed * 1000, 2),
+            }
+
+    def get_schema(
+        self, schema_name: str = "public"
+    ) -> Optional[dict[str, list[dict[str, str]]]]:
+        """Retrieve database schema metadata.
+
+        Args:
+            schema_name: The PostgreSQL schema to inspect.
+
+        Returns:
+            Schema metadata dict mapping table names to column info lists,
+            or None on failure.
+        """
+        sql = """
+            SELECT
+                t.table_name,
+                c.column_name,
+                c.data_type,
+                c.is_nullable,
+                c.column_default
+            FROM information_schema.tables t
+            JOIN information_schema.columns c
+                ON t.table_name = c.table_name
+                AND t.table_schema = c.table_schema
+            WHERE t.table_schema = %s
+                AND t.table_type = 'BASE TABLE'
+            ORDER BY t.table_name, c.ordinal_position;
+        """
+        if not self.is_connected:
+            return None
+
+        try:
+            with self._conn.cursor(
+                cursor_factory=psycopg2.extras.RealDictCursor
+            ) as cur:
+                cur.execute(sql, (schema_name,))
+                rows = cur.fetchall()
+        except psycopg2.Error as exc:
+            logger.warning("Failed to fetch schema: %s", exc)
+            return None
+
+        schema: dict[str, list[dict[str, str]]] = {}
+        for row in rows:
+            table = row["table_name"]
+            col_info = {
+                "column_name": row["column_name"],
+                "data_type": row["data_type"],
+                "is_nullable": row["is_nullable"],
+                "column_default": row["column_default"] or "",
+            }
+            if table not in schema:
+                schema[table] = []
+            schema[table].append(col_info)
+
+        return schema
+
+    def get_connection_info(self) -> str:
+        """Return a display-friendly connection string (password masked)."""
+        return (
+            f"{self.conn_params['user']}@"
+            f"{self.conn_params['host']}:{self.conn_params['port']}/"
+            f"{self.conn_params['dbname']}"
+        )
diff --git a/tools/pg-assistant/profile_manager.py b/tools/pg-assistant/profile_manager.py
new file mode 100644
index 0000000..456d65b
--- /dev/null
+++ b/tools/pg-assistant/profile_manager.py
@@ -0,0 +1,113 @@
+"""Database connection profile manager — save and load profiles as JSON."""
+
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Any, Optional
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_PROFILES_DIR = os.path.join(str(Path.home()), ".pg-assistant")
+PROFILES_FILE = "profiles.json"
+
+
+class ProfileManager:
+    """Manages saved database connection profiles."""
+
+    def __init__(self, profiles_dir: str = DEFAULT_PROFILES_DIR) -> None:
+        self.profiles_dir = profiles_dir
+        self.profiles_path = os.path.join(profiles_dir, PROFILES_FILE)
+        self._ensure_dir()
+
+    def _ensure_dir(self) -> None:
+        """Create the profiles directory if it doesn't exist."""
+        os.makedirs(self.profiles_dir, exist_ok=True)
+
+    def _load_all(self) -> dict[str, dict[str, Any]]:
+        """Load all profiles from disk."""
+        if not os.path.exists(self.profiles_path):
+            return {}
+        try:
+            with open(self.profiles_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if isinstance(data, dict):
+                return data
+        except (json.JSONDecodeError, OSError) as exc:
+            logger.warning("Failed to load profiles: %s", exc)
+        return {}
+
+    def _save_all(self, profiles: dict[str, dict[str, Any]]) -> None:
+        """Save all profiles to disk."""
+        try:
+            with open(self.profiles_path, "w", encoding="utf-8") as f:
+                json.dump(profiles, f, indent=2)
+            logger.info("Profiles saved to %s", self.profiles_path)
+        except OSError as exc:
+            logger.error("Failed to save profiles: %s", exc)
+
+    def list_profiles(self) -> list[str]:
+        """Return a list of saved profile names."""
+        return list(self._load_all().keys())
+
+    def get_profile(self, name: str) -> Optional[dict[str, Any]]:
+        """Retrieve a saved profile by name.
+
+        Args:
+            name: The profile name.
+
+        Returns:
+            A dict with connection parameters, or None if not found.
+        """
+        profiles = self._load_all()
+        return profiles.get(name)
+
+    def save_profile(
+        self,
+        name: str,
+        host: str,
+        port: int,
+        database: str,
+        user: str,
+        password: str,
+        sslmode: str = "prefer",
+    ) -> None:
+        """Save a database connection profile.
+
+        Args:
+            name: A friendly name for the profile.
+            host: PostgreSQL host.
+            port: PostgreSQL port.
+            database: Database name.
+            user: Database user.
+            password: Database password.
+            sslmode: SSL mode (default: prefer).
+        """
+        profiles = self._load_all()
+        profiles[name] = {
+            "host": host,
+            "port": port,
+            "database": database,
+            "user": user,
+            "password": password,
+            "sslmode": sslmode,
+        }
+        self._save_all(profiles)
+        logger.info("Profile '%s' saved", name)
+
+    def delete_profile(self, name: str) -> bool:
+        """Delete a saved profile.
+
+        Args:
+            name: The profile name to delete.
+
+        Returns:
+            True if deleted, False if not found.
+        """
+        profiles = self._load_all()
+        if name in profiles:
+            del profiles[name]
+            self._save_all(profiles)
+            logger.info("Profile '%s' deleted", name)
+            return True
+        return False
diff --git a/tools/pg-assistant/requirements.txt b/tools/pg-assistant/requirements.txt
index 8f33262..7ef45a6 100644
--- a/tools/pg-assistant/requirements.txt
+++ b/tools/pg-assistant/requirements.txt
@@ -1,2 +1,4 @@
 requests>=2.31.0,<3.0.0
-rich>=13.7.0,<15.0.0
+psycopg2-binary>=2.9.0,<3.0.0
+streamlit>=1.28.0,<2.0.0
+pandas>=2.0.0,<3.0.0

From ac1a5e7e0415fdb7ee2a904017a3010f774a39d7 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sat, 4 Apr 2026 17:07:16 +0000
Subject: [PATCH 03/19] Remove unused mcp_client.py (replaced by db_client.py)

---
 tools/pg-assistant/mcp_client.py | 174 -------------------------------
 1 file changed, 174 deletions(-)
 delete mode 100644 tools/pg-assistant/mcp_client.py

diff --git a/tools/pg-assistant/mcp_client.py b/tools/pg-assistant/mcp_client.py
deleted file mode 100644
index e4bc230..0000000
--- a/tools/pg-assistant/mcp_client.py
+++ /dev/null
@@ -1,174 +0,0 @@
-"""MCP (Model Context Protocol) client for PostgreSQL server communication."""
-
-import logging
-import time
-from typing import Any, Optional
-
-import requests
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_MCP_URL = "http://localhost:3000"
-DEFAULT_TIMEOUT = 30
-
-
-class MCPClient:
-    """Client for interacting with the MCP PostgreSQL server."""
-
-    def __init__(
-        self,
-        base_url: str = DEFAULT_MCP_URL,
-        timeout: int = DEFAULT_TIMEOUT,
-    ) -> None:
-        self.base_url = base_url.rstrip("/")
-        self.timeout = timeout
-
-    def execute_query(self, sql: str) -> dict[str, Any]:
-        """Execute a SQL query via the MCP server.
-
-        Args:
-            sql: The SQL query string to execute.
-
-        Returns:
-            A dict with keys 'columns' and 'rows' on success,
-            or 'error' on failure.
-
-        Raises:
-            ConnectionError: If the MCP server is unreachable.
-            RuntimeError: If the MCP server returns an error response.
-        """
-        payload = {
-            "method": "query",
-            "params": {"sql": sql},
-        }
-
-        logger.debug("Sending query to MCP server: %s", sql[:200])
-        start = time.monotonic()
-
-        try:
-            response = requests.post(
-                self.base_url,
-                json=payload,
-                timeout=self.timeout,
-            )
-        except requests.ConnectionError as exc:
-            logger.error("Cannot reach MCP server at %s", self.base_url)
-            raise ConnectionError(
-                f"Cannot connect to MCP server at {self.base_url}. "
-                "Ensure the MCP PostgreSQL server is running."
-            ) from exc
-        except requests.Timeout as exc:
-            logger.error("MCP request timed out after %ds", self.timeout)
-            raise RuntimeError(
-                f"MCP server request timed out after {self.timeout}s."
-            ) from exc
-
-        elapsed = time.monotonic() - start
-        logger.debug("MCP server responded in %.2fs", elapsed)
-
-        if response.status_code != 200:
-            error_detail = response.text[:500]
-            logger.error("MCP server error %d: %s", response.status_code, error_detail)
-            raise RuntimeError(
-                f"MCP server returned status {response.status_code}: {error_detail}"
-            )
-
-        data: dict[str, Any] = response.json()
-
-        if "error" in data:
-            error_msg = data["error"]
-            logger.warning("MCP query error: %s", error_msg)
-            return {"error": str(error_msg)}
-
-        return {
-            "columns": data.get("columns", []),
-            "rows": data.get("rows", []),
-            "row_count": data.get("rowCount", len(data.get("rows", []))),
-            "elapsed_ms": round(elapsed * 1000, 2),
-        }
-
-    def get_schema(self, schema_name: str = "public") -> Optional[dict[str, Any]]:
-        """Retrieve database schema metadata via the MCP server.
-
-        Args:
-            schema_name: The PostgreSQL schema to inspect.
-
-        Returns:
-            Schema metadata dict, or None on failure.
-        """
-        sql = f"""
-            SELECT
-                t.table_name,
-                c.column_name,
-                c.data_type,
-                c.is_nullable,
-                c.column_default
-            FROM information_schema.tables t
-            JOIN information_schema.columns c
-                ON t.table_name = c.table_name
-                AND t.table_schema = c.table_schema
-            WHERE t.table_schema = '{schema_name}'
-                AND t.table_type = 'BASE TABLE'
-            ORDER BY t.table_name, c.ordinal_position;
-        """
-        try:
-            result = self.execute_query(sql)
-            if "error" in result:
-                logger.warning("Failed to fetch schema: %s", result["error"])
-                return None
-            return self._parse_schema(result)
-        except (ConnectionError, RuntimeError) as exc:
-            logger.warning("Failed to fetch schema: %s", exc)
-            return None
-
-    def health_check(self) -> bool:
-        """Check whether the MCP server is reachable.
-
-        Returns:
-            True if the server responds, False otherwise.
-        """
-        try:
-            response = requests.get(self.base_url, timeout=5)
-            return response.status_code < 500
-        except (requests.ConnectionError, requests.Timeout):
-            return False
-
-    @staticmethod
-    def _parse_schema(result: dict[str, Any]) -> dict[str, list[dict[str, str]]]:
-        """Parse raw schema query results into a structured dict.
-
-        Args:
-            result: The raw query result from execute_query.
-
-        Returns:
-            A dict mapping table names to lists of column info dicts.
-        """
-        schema: dict[str, list[dict[str, str]]] = {}
-        columns = result.get("columns", [])
-        rows = result.get("rows", [])
-
-        for row in rows:
-            if isinstance(row, dict):
-                table = row.get("table_name", "")
-                col_info = {
-                    "column_name": row.get("column_name", ""),
-                    "data_type": row.get("data_type", ""),
-                    "is_nullable": row.get("is_nullable", ""),
-                    "column_default": row.get("column_default", ""),
-                }
-            elif isinstance(row, (list, tuple)) and len(columns) >= 5:
-                table = str(row[0])
-                col_info = {
-                    "column_name": str(row[1]),
-                    "data_type": str(row[2]),
-                    "is_nullable": str(row[3]),
-                    "column_default": str(row[4]) if row[4] else "",
-                }
-            else:
-                continue
-
-            if table not in schema:
-                schema[table] = []
-            schema[table].append(col_info)
-
-        return schema

From 4f7c457eb468e1a65e055159f061da299c6ffdd8 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sat, 4 Apr 2026 17:29:14 +0000
Subject: [PATCH 04/19] Add multi-DB support (Oracle + PostgreSQL),
 auto-monitor, auto-analyse

- Refactor db_client.py with abstract BaseDBClient, PostgreSQLClient, OracleClient
- Add oracledb driver support (thin mode, no Oracle Client needed)
- Add db_type dropdown in profile manager and connection sidebar
- Add auto_monitor.py: periodic tablespace monitoring, auto-extend datafiles (max 20GB/file)
- Add auto_analyse.py: AWR/pg_stat_statements analysis with LLM summary + action plan
- Update sql_generator.py for dual-DB SQL dialects
- Update Streamlit UI with Auto Monitor and Auto Analyse tabs
- Update requirements.txt with oracledb dependency
- Update README.md with new architecture and features
---
 tools/pg-assistant/README.md          | 181 ++++++++-----
 tools/pg-assistant/app.py             | 370 ++++++++++++++++++++++----
 tools/pg-assistant/auto_analyse.py    | 280 +++++++++++++++++++
 tools/pg-assistant/auto_monitor.py    | 350 ++++++++++++++++++++++++
 tools/pg-assistant/db_client.py       | 318 ++++++++++++++++++----
 tools/pg-assistant/profile_manager.py |  30 +--
 tools/pg-assistant/requirements.txt   |   1 +
 tools/pg-assistant/sql_generator.py   |  58 ++--
 8 files changed, 1380 insertions(+), 208 deletions(-)
 create mode 100644 tools/pg-assistant/auto_analyse.py
 create mode 100644 tools/pg-assistant/auto_monitor.py

diff --git a/tools/pg-assistant/README.md b/tools/pg-assistant/README.md
index 7d240c9..e8c3ca9 100644
--- a/tools/pg-assistant/README.md
+++ b/tools/pg-assistant/README.md
@@ -1,47 +1,62 @@
-# pg-assistant — AI-Powered PostgreSQL Assistant
+# DB Assistant (pg-assistant)
 
-A Streamlit web UI that converts natural language questions into SQL queries using a local LLM (Ollama) and executes them directly against PostgreSQL. Includes connection profile management for saving and loading database configurations.
+AI-powered database assistant that converts natural language questions into SQL
+queries using a local LLM (Ollama) and executes them against **PostgreSQL** or
+**Oracle** databases via a Streamlit web UI.
 
 ## Architecture
 
 ```
-User Question (natural language)
-        │
-        ▼
-┌──────────────────┐
-│  sql_generator   │  ← Prompt engineering + safety validation
-│                  │
-│  ┌────────────┐  │
-│  │ llm_client │──┼──→ Ollama API (codellama)
-│  └────────────┘  │
-└────────┬─────────┘
-         │ validated SELECT query
-         ▼
-┌──────────────────┐
-│    db_client     │──→ PostgreSQL (direct via psycopg2)
-└──────────────────┘
-         │
-         ▼
-   Streamlit Web UI (tables, charts, CSV export)
+┌──────────────────────────────────────────────────────┐
+│                  Streamlit Web UI                     │
+│  (app.py)                                            │
+│  ┌──────────┬──────────┬───────────┬───────────────┐ │
+│  │  Query   │  Schema  │  Auto     │  Auto         │ │
+│  │  Tab     │  Tab     │  Monitor  │  Analyse      │ │
+│  └──────────┴──────────┴───────────┴───────────────┘ │
+└──────────┬──────────────┬──────────────┬─────────────┘
+           │              │              │
+    ┌──────▼──────┐ ┌─────▼─────┐ ┌─────▼──────┐
+    │ sql_generator│ │ auto_     │ │ auto_      │
+    │ .py         │ │ monitor.py│ │ analyse.py │
+    └──────┬──────┘ └─────┬─────┘ └─────┬──────┘
+           │              │              │
+    ┌──────▼──────┐       │       ┌──────▼──────┐
+    │ llm_client  │       │       │ llm_client  │
+    │ .py (Ollama)│       │       │ .py (Ollama)│
+    └─────────────┘       │       └─────────────┘
+                          │
+              ┌───────────▼───────────┐
+              │     db_client.py      │
+              │  ┌─────────┬────────┐ │
+              │  │ Postgre │ Oracle │ │
+              │  │ SQL     │ Client │ │
+              │  │ Client  │        │ │
+              │  └─────────┴────────┘ │
+              └───────────────────────┘
+              ┌───────────────────────┐
+              │  profile_manager.py   │
+              │  (~/.pg-assistant/    │
+              │   profiles.json)      │
+              └───────────────────────┘
 ```
 
-| Module              | Responsibility                                    |
-|---------------------|---------------------------------------------------|
-| `app.py`            | Streamlit web UI                                  |
-| `llm_client.py`     | Ollama API communication                          |
-| `db_client.py`      | Direct PostgreSQL connection via psycopg2          |
-| `sql_generator.py`  | Prompt engineering, SQL extraction, safety checks  |
-| `profile_manager.py`| Save / load database connection profiles (JSON)    |
+| Module              | Purpose                                              |
+|---------------------|------------------------------------------------------|
+| `app.py`            | Streamlit web UI — tabs for Query, Schema, Monitor, Analyse, History |
+| `db_client.py`      | Abstract DB client with PostgreSQL (psycopg2) and Oracle (oracledb) implementations |
+| `llm_client.py`     | Ollama REST API client (`/api/generate`)              |
+| `sql_generator.py`  | Prompt engineering, SQL extraction, safety validation, retry logic |
+| `profile_manager.py`| Save/load/delete connection profiles as JSON          |
+| `auto_monitor.py`   | Periodic tablespace monitoring, auto-extend datafiles (Oracle) |
+| `auto_analyse.py`   | AWR/V$ (Oracle) and pg_stat_statements (PG) analysis with LLM summary |
 
 ## Prerequisites
 
 - **Python 3.10+**
-- **Ollama** running locally with the `codellama` model pulled:
-  ```bash
-  ollama serve &
-  ollama pull codellama
-  ```
-- **PostgreSQL** database accessible from the machine running pg-assistant
+- **Ollama** running locally with a model (e.g. `codellama`)
+- **PostgreSQL** and/or **Oracle** database accessible from this machine
+- For Oracle: `oracledb` uses thin mode (no Oracle Client installation needed)
 
 ## Installation
 
@@ -50,53 +65,87 @@ cd tools/pg-assistant
 pip install -r requirements.txt
 ```
 
+### Dependencies
+
+| Package           | Purpose                    |
+|-------------------|----------------------------|
+| `requests`        | Ollama HTTP API calls      |
+| `psycopg2-binary` | PostgreSQL driver          |
+| `oracledb`        | Oracle driver (thin mode)  |
+| `streamlit`       | Web UI framework           |
+| `pandas`          | DataFrame display & CSV    |
+
+> You only need the driver for the database(s) you plan to connect to.
+> If you only use PostgreSQL, `oracledb` is optional (and vice versa).
+
 ## Usage
 
 ```bash
-# Start the Streamlit web UI
 streamlit run app.py
-
-# Or with a custom port
-streamlit run app.py --server.port 8502
 ```
 
-Then open the URL shown in your terminal (default: `http://localhost:8501`).
+Then open **http://localhost:8501** in your browser.
 
 ### Web UI Features
 
-| Feature                | Description                                        |
-|------------------------|----------------------------------------------------|
-| **Ollama Settings**    | Configure Ollama URL and model in the sidebar       |
-| **DB Connection**      | Enter host, port, database, user, password, SSL     |
-| **Connection Profiles**| Save, load, and delete database connection profiles |
-| **Query Tab**          | Type natural language questions, view generated SQL  |
-| **Schema Tab**         | Browse database tables and columns                  |
-| **History Tab**        | Review past queries and results                     |
-| **CSV Export**         | Download query results as CSV                       |
-
-### Connection Profiles
-
-Profiles are saved to `~/.pg-assistant/profiles.json`. Each profile stores:
-- Host, port, database name
-- Username and password
-- SSL mode
-
-To use profiles:
-1. Fill in connection details in the sidebar
-2. Enter a profile name and click **Save Current Settings**
-3. Next time, select the profile from the **Load Profile** dropdown
-4. Click **Connect** to establish the connection
+1. **Sidebar** — Configure Ollama URL/model, select database type
+   (PostgreSQL or Oracle), enter connection details, save/load/delete profiles.
+
+2. **Query Tab** — Type a natural language question, see the generated SQL,
+   review results in an interactive table, download as CSV.
+
+3. **Schema Tab** — Browse database schema with expandable table views.
+
+4. **Auto Monitor Tab** — Configure threshold, interval, and max file size.
+   Start periodic monitoring or run a one-time check.
+   - **Oracle**: Monitors tablespace usage via `DBA_DATA_FILES` / `DBA_FREE_SPACE`.
+     Automatically enables autoextend or adds datafiles when usage exceeds threshold
+     (max 20 GB per file by default).
+   - **PostgreSQL**: Reports tablespace, database, and table sizes.
+
+5. **Auto Analyse Tab** — Collect performance data and generate an AI-powered
+   summary with action plan.
+   - **Oracle**: Queries `V$SQL`, `V$SYSTEM_EVENT`, `V$SYSSTAT`, `V$SGAINFO`,
+     `V$FILESTAT` for performance metrics.
+   - **PostgreSQL**: Queries `pg_stat_statements`, `pg_stat_user_tables`,
+     `pg_stat_database`, `pg_stat_bgwriter`, `pg_stat_user_indexes`.
+
+6. **History Tab** — Review past queries with status, row counts, and timing.
+
+## Connection Profiles
+
+Profiles are stored in `~/.pg-assistant/profiles.json` and include:
+
+| Field          | Description                     |
+|----------------|---------------------------------|
+| `db_type`      | `postgresql` or `oracle`        |
+| `host`         | Database hostname               |
+| `port`         | Database port                   |
+| `database`     | Database name (PostgreSQL)      |
+| `service_name` | Service name (Oracle)           |
+| `user`         | Database username               |
+| `password`     | Database password (plaintext)   |
+| `sslmode`      | SSL mode (PostgreSQL only)      |
+
+> **Security note**: Passwords are stored in plaintext. Use file-system
+> permissions to restrict access, or consider integrating with a secrets
+> manager for production use.
 
 ## SQL Safety
 
-The assistant enforces **read-only access** by:
+The SQL generator blocks dangerous keywords before execution:
+
+`DROP`, `DELETE`, `TRUNCATE`, `UPDATE`, `INSERT`, `ALTER`, `CREATE`,
+`GRANT`, `REVOKE`, `EXEC`, `EXECUTE`
 
-1. Blocking dangerous keywords: `DROP`, `DELETE`, `TRUNCATE`, `UPDATE`, `INSERT`, `ALTER`, `CREATE`, `GRANT`, `REVOKE`, `EXEC`, `EXECUTE`
-2. Requiring queries to start with `SELECT` or `WITH` (CTEs)
-3. Stripping string literals before keyword scanning to avoid false positives
+Only `SELECT` and `WITH` (CTE) queries are allowed through the natural
+language query path. The auto-monitor uses a separate internal path for
+administrative DDL (e.g. `ALTER TABLESPACE`).
 
 ## Schema Awareness
 
-On connection, the assistant fetches `information_schema` metadata and injects it into every LLM prompt. This provides the model with table names, column names, data types, and constraints — significantly improving SQL generation accuracy.
+On connection, the tool fetches schema metadata and injects it into every
+LLM prompt so the model generates accurate, table-aware SQL.
 
-Refresh the schema at any time via the **Schema** tab.
+- **PostgreSQL**: Queries `information_schema.tables` / `information_schema.columns`
+- **Oracle**: Queries `ALL_TAB_COLUMNS`
diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index 9bf4edc..d830787 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
-"""AI-powered PostgreSQL assistant — Streamlit web UI.
+"""AI-powered database assistant -- Streamlit web UI.
 
 Converts natural language questions into SQL queries using a local LLM (Ollama)
-and executes them directly against a PostgreSQL database.
+and executes them directly against PostgreSQL or Oracle databases.
 """
 
 import time
@@ -10,7 +10,14 @@
 import pandas as pd
 import streamlit as st
 
-from db_client import DBClient
+from auto_analyse import PerformanceAnalyser
+from auto_monitor import TablespaceMonitor
+from db_client import (
+    DB_TYPE_ORACLE,
+    DB_TYPE_POSTGRESQL,
+    BaseDBClient,
+    create_db_client,
+)
 from llm_client import LLMClient
 from profile_manager import ProfileManager
 from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError
@@ -19,8 +26,8 @@
 # Page config
 # ---------------------------------------------------------------------------
 st.set_page_config(
-    page_title="PG Assistant",
-    page_icon="🐘",
+    page_title="DB Assistant",
+    page_icon="🛢️",
     layout="wide",
     initial_sidebar_state="expanded",
 )
@@ -34,6 +41,8 @@
     "sql_generator": None,
     "schema_metadata": None,
     "query_history": [],
+    "monitor": None,
+    "analyser": None,
 }
 for _key, _val in _defaults.items():
     if _key not in st.session_state:
@@ -42,11 +51,23 @@
 profile_mgr = ProfileManager()
 
 # ---------------------------------------------------------------------------
-# Sidebar — connection & profile management
+# Helper: current db_type from connected client
+# ---------------------------------------------------------------------------
+
+
+def _connected_db_type() -> str:
+    client: BaseDBClient | None = st.session_state.db_client
+    if client and client.is_connected:
+        return client.db_type
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# Sidebar -- connection & profile management
 # ---------------------------------------------------------------------------
 with st.sidebar:
-    st.title("🐘 PG Assistant")
-    st.caption("AI-powered PostgreSQL query tool")
+    st.title("🛢️ DB Assistant")
+    st.caption("AI-powered PostgreSQL & Oracle query tool")
     st.divider()
 
     # --- Ollama settings ---------------------------------------------------
@@ -68,6 +89,10 @@
     # --- Database connection ------------------------------------------------
     st.subheader("🗄️ Database Connection")
 
+    db_type_options = ["PostgreSQL", "Oracle"]
+    db_type_map = {"PostgreSQL": DB_TYPE_POSTGRESQL, "Oracle": DB_TYPE_ORACLE}
+    reverse_map = {v: k for k, v in db_type_map.items()}
+
     saved_profiles = profile_mgr.list_profiles()
     profile_options = ["-- New Connection --"] + saved_profiles
     selected_profile = st.selectbox("Load Profile", profile_options)
@@ -76,54 +101,81 @@
     if selected_profile != "-- New Connection --":
         profile_data = profile_mgr.get_profile(selected_profile) or {}
 
+    profile_db_type = profile_data.get("db_type", DB_TYPE_POSTGRESQL)
+    default_type_idx = db_type_options.index(
+        reverse_map.get(profile_db_type, "PostgreSQL")
+    )
+    selected_db_label = st.selectbox(
+        "Database Type", db_type_options, index=default_type_idx
+    )
+    selected_db_type = db_type_map[selected_db_label]
+
     col1, col2 = st.columns(2)
     with col1:
         db_host = st.text_input("Host", value=profile_data.get("host", "localhost"))
         db_port = st.number_input(
             "Port",
-            value=profile_data.get("port", 5432),
+            value=profile_data.get(
+                "port", 5432 if selected_db_type == DB_TYPE_POSTGRESQL else 1521
+            ),
             min_value=1,
             max_value=65535,
             step=1,
         )
-        db_name = st.text_input(
-            "Database", value=profile_data.get("database", "postgres")
-        )
+        if selected_db_type == DB_TYPE_POSTGRESQL:
+            db_name = st.text_input(
+                "Database", value=profile_data.get("database", "postgres")
+            )
+        else:
+            db_service = st.text_input(
+                "Service Name", value=profile_data.get("service_name", "ORCL")
+            )
     with col2:
-        db_user = st.text_input("User", value=profile_data.get("user", "postgres"))
+        db_user = st.text_input(
+            "User",
+            value=profile_data.get(
+                "user", "postgres" if selected_db_type == DB_TYPE_POSTGRESQL else ""
+            ),
+        )
         db_password = st.text_input(
             "Password",
             value=profile_data.get("password", ""),
             type="password",
         )
-        db_sslmode = st.selectbox(
-            "SSL Mode",
-            ["prefer", "disable", "require", "verify-ca", "verify-full"],
-            index=[
-                "prefer",
-                "disable",
-                "require",
-                "verify-ca",
-                "verify-full",
-            ].index(profile_data.get("sslmode", "prefer")),
-        )
+        if selected_db_type == DB_TYPE_POSTGRESQL:
+            db_sslmode = st.selectbox(
+                "SSL Mode",
+                ["prefer", "disable", "require", "verify-ca", "verify-full"],
+                index=[
+                    "prefer",
+                    "disable",
+                    "require",
+                    "verify-ca",
+                    "verify-full",
+                ].index(profile_data.get("sslmode", "prefer")),
+            )
 
     if st.button("🔌 Connect", use_container_width=True, type="primary"):
         try:
-            db = DBClient(
-                host=db_host,
-                port=int(db_port),
-                database=db_name,
-                user=db_user,
-                password=db_password,
-                sslmode=db_sslmode,
-            )
+            conn_kwargs: dict = {
+                "host": db_host,
+                "port": int(db_port),
+                "user": db_user,
+                "password": db_password,
+            }
+            if selected_db_type == DB_TYPE_POSTGRESQL:
+                conn_kwargs["database"] = db_name
+                conn_kwargs["sslmode"] = db_sslmode
+            else:
+                conn_kwargs["service_name"] = db_service
+
+            db = create_db_client(selected_db_type, **conn_kwargs)
             db.connect()
             st.session_state.db_client = db
 
             llm = LLMClient(base_url=ollama_url, model=ollama_model)
             st.session_state.llm_client = llm
-            gen = SQLGenerator(llm_client=llm)
+            gen = SQLGenerator(llm_client=llm, db_type=selected_db_type)
             st.session_state.sql_generator = gen
 
             schema = db.get_schema()
@@ -131,16 +183,23 @@
                 gen.update_schema(schema)
                 st.session_state.schema_metadata = schema
 
+            st.session_state.monitor = None
+            st.session_state.analyser = None
+
             st.success(f"Connected to {db.get_connection_info()}")
-        except ConnectionError as exc:
+        except (ConnectionError, ImportError) as exc:
             st.error(str(exc))
 
     if st.session_state.db_client and st.session_state.db_client.is_connected:
         if st.button("Disconnect", use_container_width=True):
+            if st.session_state.monitor:
+                st.session_state.monitor.stop()
             st.session_state.db_client.disconnect()
             st.session_state.db_client = None
             st.session_state.sql_generator = None
             st.session_state.schema_metadata = None
+            st.session_state.monitor = None
+            st.session_state.analyser = None
             st.rerun()
 
     st.divider()
@@ -152,15 +211,20 @@
         if not profile_name:
             st.warning("Enter a profile name first.")
         else:
-            profile_mgr.save_profile(
-                name=profile_name,
-                host=db_host,
-                port=int(db_port),
-                database=db_name,
-                user=db_user,
-                password=db_password,
-                sslmode=db_sslmode,
-            )
+            save_kwargs: dict = {
+                "name": profile_name,
+                "db_type": selected_db_type,
+                "host": db_host,
+                "port": int(db_port),
+                "user": db_user,
+                "password": db_password,
+            }
+            if selected_db_type == DB_TYPE_POSTGRESQL:
+                save_kwargs["database"] = db_name
+                save_kwargs["sslmode"] = db_sslmode
+            else:
+                save_kwargs["service_name"] = db_service
+            profile_mgr.save_profile(**save_kwargs)
             st.success(f"Profile '{profile_name}' saved!")
             st.rerun()
 
@@ -178,12 +242,13 @@
 # ---------------------------------------------------------------------------
 # Main area
 # ---------------------------------------------------------------------------
-st.header("🐘 AI PostgreSQL Assistant")
+st.header("🛢️ AI Database Assistant")
 
 if st.session_state.db_client and st.session_state.db_client.is_connected:
+    db_label = _connected_db_type().upper()
     st.info(
         f"Connected to **{st.session_state.db_client.get_connection_info()}** "
-        f"| Model: **{ollama_model}**"
+        f"({db_label}) | Model: **{ollama_model}**"
     )
 else:
     st.warning("Not connected to a database. Use the sidebar to connect.")
@@ -191,7 +256,9 @@
 # ---------------------------------------------------------------------------
 # Tabs
 # ---------------------------------------------------------------------------
-tab_query, tab_schema, tab_history = st.tabs(["💬 Query", "📋 Schema", "📜 History"])
+tab_query, tab_schema, tab_monitor, tab_analyse, tab_history = st.tabs(
+    ["💬 Query", "📋 Schema", "📡 Auto Monitor", "📊 Auto Analyse", "📜 History"]
+)
 
 # ---- Query tab ------------------------------------------------------------
 with tab_query:
@@ -325,6 +392,217 @@
     else:
         st.warning("Connect to a database first.")
 
+# ---- Auto Monitor tab -----------------------------------------------------
+with tab_monitor:
+    st.subheader("📡 Tablespace Auto Monitor")
+
+    if not (st.session_state.db_client and st.session_state.db_client.is_connected):
+        st.warning("Connect to a database first.")
+    else:
+        db_client = st.session_state.db_client
+
+        st.markdown(
+            "Periodically monitors tablespace usage and automatically extends "
+            "datafiles when usage exceeds the threshold (Oracle). "
+            "For PostgreSQL, reports storage metrics."
+        )
+
+        mcol1, mcol2, mcol3 = st.columns(3)
+        with mcol1:
+            mon_threshold = st.slider(
+                "Usage threshold (%)", 50, 99, 85, key="mon_threshold"
+            )
+        with mcol2:
+            mon_interval = st.selectbox(
+                "Check interval",
+                [60, 300, 900, 1800, 3600],
+                index=4,
+                format_func=lambda x: (
+                    f"{x // 60} min" if x < 3600 else f"{x // 3600} hr"
+                ),
+                key="mon_interval",
+            )
+        with mcol3:
+            mon_max_gb = st.number_input(
+                "Max file size (GB)", 1, 100, 20, key="mon_max_gb"
+            )
+
+        bcol1, bcol2, bcol3 = st.columns(3)
+        with bcol1:
+            if st.button(
+                "▶️ Start Auto Monitor", use_container_width=True, type="primary"
+            ):
+                monitor = TablespaceMonitor(
+                    db_client=db_client,
+                    threshold_pct=mon_threshold,
+                    max_file_size_gb=mon_max_gb,
+                    interval_sec=mon_interval,
+                )
+                monitor.start()
+                st.session_state.monitor = monitor
+                st.success("Monitor started!")
+        with bcol2:
+            if st.button("⏹️ Stop Monitor", use_container_width=True):
+                if st.session_state.monitor:
+                    st.session_state.monitor.stop()
+                    st.info("Monitor stopped.")
+        with bcol3:
+            if st.button("🔍 Check Now", use_container_width=True):
+                monitor = st.session_state.monitor
+                if not monitor:
+                    monitor = TablespaceMonitor(
+                        db_client=db_client,
+                        threshold_pct=mon_threshold,
+                        max_file_size_gb=mon_max_gb,
+                    )
+                    st.session_state.monitor = monitor
+                with st.spinner("Checking tablespace usage..."):
+                    event = monitor.run_check()
+                st.success("Check complete!")
+
+        if st.session_state.monitor and st.session_state.monitor.running:
+            st.info(
+                f"Monitor is running (interval: {st.session_state.monitor.interval_sec}s, "
+                f"threshold: {st.session_state.monitor.threshold_pct}%)"
+            )
+
+        # Display monitor events
+        monitor = st.session_state.monitor
+        if monitor and monitor.events:
+            st.divider()
+            st.subheader("Monitor Events")
+            for i, evt in enumerate(reversed(monitor.events[-20:])):
+                status_icon = {"ok": "🟢", "warning": "🟡", "error": "🔴"}.get(
+                    evt["status"], "⚪"
+                )
+                with st.expander(
+                    f"{status_icon} {evt['timestamp']} - {evt['status'].upper()}"
+                ):
+                    if evt.get("error"):
+                        st.error(evt["error"])
+
+                    ts_data = evt.get("tablespace_data", [])
+                    display_rows = [
+                        r
+                        for r in ts_data
+                        if isinstance(r, dict) and "_section" not in r
+                    ]
+                    if display_rows:
+                        st.caption("Tablespace Usage")
+                        st.dataframe(
+                            pd.DataFrame(display_rows),
+                            use_container_width=True,
+                            hide_index=True,
+                        )
+
+                    for section_item in ts_data:
+                        if (
+                            isinstance(section_item, dict)
+                            and "_section" in section_item
+                        ):
+                            st.caption(section_item["_section"].title())
+                            sec_rows = section_item.get("rows", [])
+                            if sec_rows:
+                                st.dataframe(
+                                    pd.DataFrame(sec_rows),
+                                    use_container_width=True,
+                                    hide_index=True,
+                                )
+
+                    actions = evt.get("actions", [])
+                    if actions:
+                        st.caption("Actions Taken")
+                        for act in actions:
+                            act_icon = (
+                                "✅"
+                                if "added" in act.get("action", "")
+                                or "enabled" in act.get("action", "")
+                                else "❌"
+                            )
+                            st.markdown(f"{act_icon} **{act.get('action', '')}**")
+                            if act.get("sql"):
+                                st.code(act["sql"], language="sql")
+                            if act.get("error"):
+                                st.error(act["error"])
+
+# ---- Auto Analyse tab -----------------------------------------------------
+with tab_analyse:
+    st.subheader("📊 Performance Analysis")
+
+    if not (st.session_state.db_client and st.session_state.db_client.is_connected):
+        st.warning("Connect to a database first.")
+    elif not st.session_state.llm_client:
+        st.warning("Configure Ollama settings and connect first.")
+    else:
+        db_client = st.session_state.db_client
+        llm_client = st.session_state.llm_client
+        db_label = db_client.db_type.upper()
+
+        st.markdown(
+            f"Collects performance data from **{db_label}** "
+            f"({'AWR / V$ views' if db_client.db_type == DB_TYPE_ORACLE else 'pg_stat_statements / pg_stat_*'}) "
+            "and generates an AI-powered summary with action plan."
+        )
+
+        acol1, acol2 = st.columns(2)
+        with acol1:
+            if st.button("📈 Collect Data Only", use_container_width=True):
+                analyser = PerformanceAnalyser(
+                    db_client=db_client, llm_client=llm_client
+                )
+                with st.spinner("Collecting performance data..."):
+                    raw_data = analyser.collect_data()
+                st.session_state.analyser = analyser
+                st.session_state["_last_analysis"] = {
+                    "raw_data": raw_data,
+                    "analysis": None,
+                }
+                st.success("Data collected!")
+
+        with acol2:
+            if st.button(
+                "🧠 Full Analysis (Data + LLM)",
+                use_container_width=True,
+                type="primary",
+            ):
+                analyser = PerformanceAnalyser(
+                    db_client=db_client, llm_client=llm_client
+                )
+                with st.spinner("Collecting data and running LLM analysis..."):
+                    result = analyser.analyse()
+                st.session_state.analyser = analyser
+                st.session_state["_last_analysis"] = result
+                st.success("Analysis complete!")
+
+        # Display analysis results
+        last = st.session_state.get("_last_analysis")
+        if last:
+            st.divider()
+
+            if last.get("analysis"):
+                st.subheader("AI Analysis & Action Plan")
+                st.markdown(last["analysis"])
+
+            raw = last.get("raw_data", {})
+            if raw:
+                st.divider()
+                st.subheader("Raw Performance Data")
+                for section_name, section_data in raw.items():
+                    if section_name == "db_type":
+                        continue
+                    label = section_name.replace("_", " ").title()
+                    with st.expander(f"📊 {label}"):
+                        if isinstance(section_data, dict) and "error" in section_data:
+                            st.error(section_data["error"])
+                        elif isinstance(section_data, list) and section_data:
+                            st.dataframe(
+                                pd.DataFrame(section_data),
+                                use_container_width=True,
+                                hide_index=True,
+                            )
+                        else:
+                            st.info("No data available.")
+
 # ---- History tab ----------------------------------------------------------
 with tab_history:
     st.subheader("Query History")
diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
new file mode 100644
index 0000000..737a3a7
--- /dev/null
+++ b/tools/pg-assistant/auto_analyse.py
@@ -0,0 +1,280 @@
+"""Performance analysis for Oracle (AWR/V$) and PostgreSQL (pg_stat_statements)."""
+
+import logging
+from typing import Any
+
+from db_client import BaseDBClient, DB_TYPE_ORACLE, DB_TYPE_POSTGRESQL
+from llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Oracle V$ performance queries
+# ---------------------------------------------------------------------------
+_ORA_TOP_SQL = """
+    SELECT * FROM (
+        SELECT
+            sql_id,
+            plan_hash_value,
+            ROUND(elapsed_time / 1e6, 2) AS elapsed_sec,
+            executions,
+            buffer_gets,
+            disk_reads,
+            SUBSTR(sql_text, 1, 200) AS sql_text
+        FROM v$sql
+        ORDER BY elapsed_time DESC
+    ) WHERE ROWNUM <= 20
+"""
+
+_ORA_WAIT_EVENTS = """
+    SELECT * FROM (
+        SELECT
+            event,
+            total_waits,
+            ROUND(time_waited / 100, 2) AS time_waited_sec,
+            ROUND(average_wait / 100, 4) AS avg_wait_sec
+        FROM v$system_event
+        WHERE wait_class != 'Idle'
+        ORDER BY time_waited DESC
+    ) WHERE ROWNUM <= 20
+"""
+
+_ORA_SYS_STATS = """
+    SELECT name, value
+    FROM v$sysstat
+    WHERE name IN (
+        'db block gets', 'consistent gets', 'physical reads',
+        'redo size', 'sorts (memory)', 'sorts (disk)',
+        'rows processed', 'parse count (total)', 'parse count (hard)',
+        'execute count', 'user commits', 'user rollbacks'
+    )
+    ORDER BY name
+"""
+
+_ORA_SGA = """
+    SELECT name, ROUND(bytes / 1048576, 2) AS size_mb
+    FROM v$sgainfo
+    WHERE name IN (
+        'Fixed SGA Size', 'Redo Buffers', 'Buffer Cache Size',
+        'Shared Pool Size', 'Large Pool Size', 'Java Pool Size',
+        'Streams Pool Size', 'Maximum SGA Size'
+    )
+    ORDER BY name
+"""
+
+_ORA_TABLESPACE_IO = """
+    SELECT * FROM (
+        SELECT
+            ts.name AS tablespace_name,
+            SUM(fs.phyrds) AS physical_reads,
+            SUM(fs.phywrts) AS physical_writes,
+            ROUND(SUM(fs.readtim) / 100, 2) AS read_time_sec,
+            ROUND(SUM(fs.writetim) / 100, 2) AS write_time_sec
+        FROM v$filestat fs
+        JOIN v$datafile df ON fs.file# = df.file#
+        JOIN v$tablespace ts ON df.ts# = ts.ts#
+        GROUP BY ts.name
+        ORDER BY physical_reads + physical_writes DESC
+    ) WHERE ROWNUM <= 20
+"""
+
+# ---------------------------------------------------------------------------
+# PostgreSQL performance queries
+# ---------------------------------------------------------------------------
+_PG_TOP_QUERIES = """
+    SELECT
+        queryid,
+        LEFT(query, 200) AS query_text,
+        calls,
+        ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec,
+        ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec,
+        rows,
+        shared_blks_hit,
+        shared_blks_read,
+        CASE WHEN shared_blks_hit + shared_blks_read > 0
+            THEN ROUND(
+                shared_blks_hit::numeric
+                / (shared_blks_hit + shared_blks_read) * 100, 2
+            )
+            ELSE 100
+        END AS cache_hit_pct
+    FROM pg_stat_statements
+    ORDER BY total_exec_time DESC
+    LIMIT 20
+"""
+
+_PG_TABLE_STATS = """
+    SELECT
+        schemaname, relname,
+        seq_scan, seq_tup_read,
+        idx_scan, idx_tup_fetch,
+        n_tup_ins, n_tup_upd, n_tup_del,
+        n_live_tup, n_dead_tup,
+        last_vacuum, last_autovacuum,
+        last_analyze, last_autoanalyze
+    FROM pg_stat_user_tables
+    ORDER BY seq_scan + COALESCE(idx_scan, 0) DESC
+    LIMIT 20
+"""
+
+_PG_DB_STATS = """
+    SELECT
+        datname,
+        numbackends,
+        xact_commit, xact_rollback,
+        blks_read, blks_hit,
+        CASE WHEN blks_hit + blks_read > 0
+            THEN ROUND(blks_hit::numeric / (blks_hit + blks_read) * 100, 2)
+            ELSE 100
+        END AS cache_hit_pct,
+        tup_returned, tup_fetched,
+        tup_inserted, tup_updated, tup_deleted,
+        temp_files, temp_bytes
+    FROM pg_stat_database
+    WHERE datname = current_database()
+"""
+
+_PG_BGWRITER = """
+    SELECT
+        checkpoints_timed, checkpoints_req,
+        buffers_checkpoint, buffers_clean, buffers_backend,
+        maxwritten_clean
+    FROM pg_stat_bgwriter
+"""
+
+_PG_UNUSED_INDEXES = """
+    SELECT
+        schemaname, relname, indexrelname,
+        idx_scan, idx_tup_read, idx_tup_fetch,
+        pg_relation_size(indexrelid) / 1048576 AS index_size_mb
+    FROM pg_stat_user_indexes
+    WHERE idx_scan = 0
+    ORDER BY pg_relation_size(indexrelid) DESC
+    LIMIT 20
+"""
+
+ANALYSIS_SYSTEM_PROMPT = (
+    "You are a senior database performance engineer. "
+    "Analyze the following database performance data and provide:\n"
+    "1. **Executive Summary** (2-3 sentences)\n"
+    "2. **Key Findings** (bullet list of important observations)\n"
+    "3. **Top Issues** (ranked by severity)\n"
+    "4. **Action Plan** (prioritized recommendations with specific SQL or steps)\n\n"
+    "Be concise and actionable. Use markdown formatting."
+)
+
+
+# ---------------------------------------------------------------------------
+# Analyser
+# ---------------------------------------------------------------------------
+class PerformanceAnalyser:
+    """Collects DB performance data and generates LLM-powered analysis."""
+
+    def __init__(
+        self,
+        db_client: BaseDBClient,
+        llm_client: LLMClient,
+    ) -> None:
+        self.db_client = db_client
+        self.llm_client = llm_client
+
+    def collect_data(self) -> dict[str, Any]:
+        """Collect raw performance data from the database."""
+        if self.db_client.db_type == DB_TYPE_ORACLE:
+            return self._collect_oracle()
+        return self._collect_postgresql()
+
+    def analyse(self) -> dict[str, Any]:
+        """Collect data, generate LLM analysis, and return everything."""
+        raw_data = self.collect_data()
+        report_text = self._format_report(raw_data)
+
+        try:
+            llm_response = self.llm_client.generate(
+                prompt=report_text,
+                system_prompt=ANALYSIS_SYSTEM_PROMPT,
+            )
+        except (ConnectionError, RuntimeError) as exc:
+            llm_response = f"LLM analysis failed: {exc}"
+
+        return {
+            "raw_data": raw_data,
+            "report_text": report_text,
+            "analysis": llm_response,
+        }
+
+    # -- Oracle collection ---------------------------------------------------
+
+    def _collect_oracle(self) -> dict[str, Any]:
+        sections: dict[str, Any] = {}
+        queries = {
+            "top_sql": _ORA_TOP_SQL,
+            "wait_events": _ORA_WAIT_EVENTS,
+            "system_stats": _ORA_SYS_STATS,
+            "sga_info": _ORA_SGA,
+            "tablespace_io": _ORA_TABLESPACE_IO,
+        }
+        for name, sql in queries.items():
+            result = self.db_client.execute_query(sql)
+            if "error" in result:
+                sections[name] = {"error": result["error"]}
+            else:
+                sections[name] = result.get("rows", [])
+        sections["db_type"] = DB_TYPE_ORACLE
+        return sections
+
+    # -- PostgreSQL collection -----------------------------------------------
+
+    def _collect_postgresql(self) -> dict[str, Any]:
+        sections: dict[str, Any] = {}
+        queries = {
+            "top_queries": _PG_TOP_QUERIES,
+            "table_stats": _PG_TABLE_STATS,
+            "database_stats": _PG_DB_STATS,
+            "bgwriter_stats": _PG_BGWRITER,
+            "unused_indexes": _PG_UNUSED_INDEXES,
+        }
+        for name, sql in queries.items():
+            result = self.db_client.execute_query(sql)
+            if "error" in result:
+                sections[name] = {"error": result["error"]}
+            else:
+                sections[name] = result.get("rows", [])
+        sections["db_type"] = DB_TYPE_POSTGRESQL
+        return sections
+
+    # -- Report formatting ---------------------------------------------------
+
+    def _format_report(self, data: dict[str, Any]) -> str:
+        """Format collected data into a human-readable report for the LLM."""
+        db_type = data.get("db_type", "unknown")
+        parts = [f"DATABASE PERFORMANCE REPORT ({db_type.upper()})\n{'=' * 60}\n"]
+
+        for section_name, section_data in data.items():
+            if section_name == "db_type":
+                continue
+            parts.append(f"\n--- {section_name.upper().replace('_', ' ')} ---")
+            if isinstance(section_data, dict) and "error" in section_data:
+                parts.append(f"  ERROR: {section_data['error']}")
+            elif isinstance(section_data, list):
+                if not section_data:
+                    parts.append("  (no data)")
+                else:
+                    for i, row in enumerate(section_data[:15]):
+                        parts.append(f"  [{i + 1}] {_format_row(row)}")
+                    if len(section_data) > 15:
+                        parts.append(f"  ... and {len(section_data) - 15} more rows")
+            else:
+                parts.append(f"  {section_data}")
+
+        return "\n".join(parts)
+
+
+def _format_row(row: dict[str, Any]) -> str:
+    """Format a single row dict into a compact string."""
+    items = []
+    for k, v in row.items():
+        if v is None:
+            continue
+        items.append(f"{k}={v}")
+    return ", ".join(items)
diff --git a/tools/pg-assistant/auto_monitor.py b/tools/pg-assistant/auto_monitor.py
new file mode 100644
index 0000000..938ca67
--- /dev/null
+++ b/tools/pg-assistant/auto_monitor.py
@@ -0,0 +1,350 @@
+"""Tablespace monitoring with auto-extend support for Oracle and PostgreSQL."""
+
+import logging
+import os
+import threading
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+from db_client import BaseDBClient, DB_TYPE_ORACLE
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_THRESHOLD_PCT = 85
+DEFAULT_MAX_FILE_SIZE_GB = 20
+DEFAULT_INTERVAL_SEC = 3600  # 1 hour
+
+
+# ---------------------------------------------------------------------------
+# Oracle tablespace queries
+# ---------------------------------------------------------------------------
+_ORACLE_TABLESPACE_USAGE_SQL = """
+    SELECT
+        df.tablespace_name,
+        COUNT(df.file_id) AS file_count,
+        ROUND(SUM(df.bytes) / 1048576, 2) AS total_size_mb,
+        ROUND(NVL(SUM(fs.free_bytes), 0) / 1048576, 2) AS free_mb,
+        ROUND((SUM(df.bytes) - NVL(SUM(fs.free_bytes), 0)) / 1048576, 2) AS used_mb,
+        ROUND(
+            (SUM(df.bytes) - NVL(SUM(fs.free_bytes), 0)) / SUM(df.bytes) * 100, 2
+        ) AS used_pct
+    FROM dba_data_files df
+    LEFT JOIN (
+        SELECT file_id, SUM(bytes) AS free_bytes
+        FROM dba_free_space
+        GROUP BY file_id
+    ) fs ON df.file_id = fs.file_id
+    GROUP BY df.tablespace_name
+    ORDER BY used_pct DESC
+"""
+
+_ORACLE_DATAFILES_SQL = """
+    SELECT
+        file_id,
+        file_name,
+        tablespace_name,
+        ROUND(bytes / 1048576, 2) AS size_mb,
+        ROUND(maxbytes / 1048576, 2) AS max_size_mb,
+        autoextensible
+    FROM dba_data_files
+    WHERE tablespace_name = :ts_name
+    ORDER BY file_id
+"""
+
+# ---------------------------------------------------------------------------
+# PostgreSQL storage queries
+# ---------------------------------------------------------------------------
+_PG_DATABASE_SIZE_SQL = """
+    SELECT
+        datname AS database_name,
+        pg_database_size(datname) / 1048576 AS size_mb
+    FROM pg_database
+    WHERE datname NOT IN ('template0', 'template1')
+    ORDER BY size_mb DESC
+"""
+
+_PG_TABLE_SIZE_SQL = """
+    SELECT
+        schemaname,
+        tablename,
+        pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) / 1048576 AS total_size_mb,
+        pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) / 1048576 AS table_size_mb
+    FROM pg_tables
+    WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
+    ORDER BY total_size_mb DESC
+    LIMIT 50
+"""
+
+_PG_TABLESPACE_SQL = """
+    SELECT
+        spcname AS tablespace_name,
+        pg_tablespace_location(oid) AS location,
+        pg_tablespace_size(oid) / 1048576 AS size_mb
+    FROM pg_tablespace
+    ORDER BY size_mb DESC
+"""
+
+
+# ---------------------------------------------------------------------------
+# Monitor event dataclass-like dict builder
+# ---------------------------------------------------------------------------
+def _event(
+    status: str,
+    tablespace_data: list[dict[str, Any]],
+    actions: list[dict[str, Any]],
+    error: str = "",
+) -> dict[str, Any]:
+    return {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "status": status,
+        "tablespace_data": tablespace_data,
+        "actions": actions,
+        "error": error,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Core monitor logic
+# ---------------------------------------------------------------------------
+class TablespaceMonitor:
+    """Monitors tablespace usage and auto-extends datafiles when needed."""
+
+    def __init__(
+        self,
+        db_client: BaseDBClient,
+        threshold_pct: float = DEFAULT_THRESHOLD_PCT,
+        max_file_size_gb: float = DEFAULT_MAX_FILE_SIZE_GB,
+        interval_sec: int = DEFAULT_INTERVAL_SEC,
+    ) -> None:
+        self.db_client = db_client
+        self.threshold_pct = threshold_pct
+        self.max_file_size_gb = max_file_size_gb
+        self.max_file_size_mb = max_file_size_gb * 1024
+        self.interval_sec = interval_sec
+        self.events: list[dict[str, Any]] = []
+        self._thread: Optional[threading.Thread] = None
+        self._stop = threading.Event()
+        self.running = False
+
+    # -- public API ----------------------------------------------------------
+
+    def start(self) -> None:
+        """Start periodic monitoring in a background thread."""
+        if self._thread and self._thread.is_alive():
+            return
+        self._stop.clear()
+        self._thread = threading.Thread(target=self._loop, daemon=True)
+        self._thread.start()
+        self.running = True
+        logger.info(
+            "Tablespace monitor started (interval=%ds, threshold=%s%%)",
+            self.interval_sec,
+            self.threshold_pct,
+        )
+
+    def stop(self) -> None:
+        """Stop the background monitoring thread."""
+        self._stop.set()
+        self.running = False
+        logger.info("Tablespace monitor stopped")
+
+    def run_check(self) -> dict[str, Any]:
+        """Run a single monitoring check and return the event dict."""
+        try:
+            if self.db_client.db_type == DB_TYPE_ORACLE:
+                return self._check_oracle()
+            return self._check_postgresql()
+        except Exception as exc:
+            event = _event("error", [], [], error=str(exc))
+            self.events.append(event)
+            return event
+
+    # -- background loop -----------------------------------------------------
+
+    def _loop(self) -> None:
+        while not self._stop.is_set():
+            try:
+                self.run_check()
+            except Exception as exc:
+                logger.error("Monitor check failed: %s", exc)
+            self._stop.wait(self.interval_sec)
+        self.running = False
+
+    # -- Oracle checks -------------------------------------------------------
+
+    def _check_oracle(self) -> dict[str, Any]:
+        result = self.db_client.execute_query(_ORACLE_TABLESPACE_USAGE_SQL)
+        if "error" in result:
+            event = _event("error", [], [], error=result["error"])
+            self.events.append(event)
+            return event
+
+        ts_data = result.get("rows", [])
+        actions: list[dict[str, Any]] = []
+
+        for ts in ts_data:
+            ts_name = ts.get("TABLESPACE_NAME") or ts.get("tablespace_name", "")
+            used_pct = float(ts.get("USED_PCT") or ts.get("used_pct", 0))
+
+            if used_pct >= self.threshold_pct:
+                ts_actions = self._auto_extend_oracle(ts_name)
+                actions.extend(ts_actions)
+
+        status = "warning" if actions else "ok"
+        event = _event(status, ts_data, actions)
+        self.events.append(event)
+        return event
+
+    def _auto_extend_oracle(self, tablespace_name: str) -> list[dict[str, Any]]:
+        """Attempt to auto-extend or add datafiles for an Oracle tablespace."""
+        actions: list[dict[str, Any]] = []
+
+        df_result = self.db_client.execute_query(
+            _ORACLE_DATAFILES_SQL.replace(":ts_name", f"'{tablespace_name}'")
+        )
+        if "error" in df_result:
+            actions.append(
+                {
+                    "tablespace": tablespace_name,
+                    "action": "error",
+                    "detail": f"Failed to query datafiles: {df_result['error']}",
+                }
+            )
+            return actions
+
+        datafiles = df_result.get("rows", [])
+        extended_any = False
+
+        for df in datafiles:
+            file_name = df.get("FILE_NAME") or df.get("file_name", "")
+            max_size_mb = float(df.get("MAX_SIZE_MB") or df.get("max_size_mb", 0))
+            autoext = df.get("AUTOEXTENSIBLE") or df.get("autoextensible", "NO")
+
+            if autoext == "YES" and max_size_mb >= self.max_file_size_mb:
+                continue
+
+            if autoext != "YES" or max_size_mb < self.max_file_size_mb:
+                max_mb = int(self.max_file_size_mb)
+                sql = (
+                    f"ALTER DATABASE DATAFILE '{file_name}' "
+                    f"AUTOEXTEND ON MAXSIZE {max_mb}M"
+                )
+                stmt_result = self.db_client.execute_statement(sql)
+                if stmt_result.get("success"):
+                    actions.append(
+                        {
+                            "tablespace": tablespace_name,
+                            "action": "autoextend_enabled",
+                            "file": file_name,
+                            "max_size_mb": max_mb,
+                            "sql": sql,
+                        }
+                    )
+                    extended_any = True
+                else:
+                    actions.append(
+                        {
+                            "tablespace": tablespace_name,
+                            "action": "autoextend_failed",
+                            "file": file_name,
+                            "error": stmt_result.get("error", "unknown"),
+                            "sql": sql,
+                        }
+                    )
+
+        if not extended_any:
+            add_sql = (
+                f"ALTER TABLESPACE {tablespace_name} ADD DATAFILE "
+                f"SIZE 1024M AUTOEXTEND ON MAXSIZE {int(self.max_file_size_mb)}M"
+            )
+            stmt_result = self.db_client.execute_statement(add_sql)
+            if stmt_result.get("success"):
+                actions.append(
+                    {
+                        "tablespace": tablespace_name,
+                        "action": "datafile_added",
+                        "sql": add_sql,
+                    }
+                )
+            else:
+                dir_path = self._derive_datafile_dir(datafiles)
+                if dir_path:
+                    new_name = os.path.join(
+                        dir_path,
+                        f"{tablespace_name.lower()}_auto_{len(datafiles) + 1:02d}.dbf",
+                    )
+                    add_sql2 = (
+                        f"ALTER TABLESPACE {tablespace_name} ADD DATAFILE "
+                        f"'{new_name}' SIZE 1024M AUTOEXTEND ON "
+                        f"MAXSIZE {int(self.max_file_size_mb)}M"
+                    )
+                    stmt_result2 = self.db_client.execute_statement(add_sql2)
+                    if stmt_result2.get("success"):
+                        actions.append(
+                            {
+                                "tablespace": tablespace_name,
+                                "action": "datafile_added",
+                                "file": new_name,
+                                "sql": add_sql2,
+                            }
+                        )
+                    else:
+                        actions.append(
+                            {
+                                "tablespace": tablespace_name,
+                                "action": "add_datafile_failed",
+                                "error": stmt_result2.get("error", "unknown"),
+                                "sql": add_sql2,
+                            }
+                        )
+                else:
+                    actions.append(
+                        {
+                            "tablespace": tablespace_name,
+                            "action": "add_datafile_failed",
+                            "error": stmt_result.get("error", "unknown"),
+                            "sql": add_sql,
+                        }
+                    )
+
+        return actions
+
+    @staticmethod
+    def _derive_datafile_dir(datafiles: list[dict[str, Any]]) -> str:
+        """Derive directory from existing datafiles for new file placement."""
+        for df in datafiles:
+            fname = df.get("FILE_NAME") or df.get("file_name", "")
+            if fname:
+                return os.path.dirname(fname)
+        return ""
+
+    # -- PostgreSQL checks ---------------------------------------------------
+
+    def _check_postgresql(self) -> dict[str, Any]:
+        ts_result = self.db_client.execute_query(_PG_TABLESPACE_SQL)
+        db_result = self.db_client.execute_query(_PG_DATABASE_SIZE_SQL)
+        tbl_result = self.db_client.execute_query(_PG_TABLE_SIZE_SQL)
+
+        ts_data: list[dict[str, Any]] = []
+        actions: list[dict[str, Any]] = []
+
+        if "error" not in ts_result:
+            ts_data.extend(ts_result.get("rows", []))
+        if "error" not in db_result:
+            ts_data.append({"_section": "databases", "rows": db_result.get("rows", [])})
+        if "error" not in tbl_result:
+            ts_data.append({"_section": "tables", "rows": tbl_result.get("rows", [])})
+
+        for err_result in (ts_result, db_result, tbl_result):
+            if "error" in err_result:
+                actions.append(
+                    {
+                        "action": "query_error",
+                        "error": err_result["error"],
+                    }
+                )
+
+        status = "ok" if not actions else "warning"
+        event = _event(status, ts_data, actions)
+        self.events.append(event)
+        return event
diff --git a/tools/pg-assistant/db_client.py b/tools/pg-assistant/db_client.py
index 8357f86..a110b7e 100644
--- a/tools/pg-assistant/db_client.py
+++ b/tools/pg-assistant/db_client.py
@@ -1,17 +1,71 @@
-"""Direct PostgreSQL database client using psycopg2."""
+"""Database client supporting PostgreSQL and Oracle connections."""
 
+import abc
 import logging
 import time
 from typing import Any, Optional
 
-import psycopg2
-import psycopg2.extras
-
 logger = logging.getLogger(__name__)
 
+DB_TYPE_POSTGRESQL = "postgresql"
+DB_TYPE_ORACLE = "oracle"
+SUPPORTED_DB_TYPES = (DB_TYPE_POSTGRESQL, DB_TYPE_ORACLE)
+
+# Conditional imports -- only the driver for the chosen DB type is required.
+try:
+    import psycopg2
+    import psycopg2.extras
+except ImportError:
+    psycopg2 = None  # type: ignore[assignment]
+
+try:
+    import oracledb
+except ImportError:
+    oracledb = None  # type: ignore[assignment]
+
 
-class DBClient:
-    """Client for direct PostgreSQL database connections."""
+# ---------------------------------------------------------------------------
+# Abstract base
+# ---------------------------------------------------------------------------
+class BaseDBClient(abc.ABC):
+    """Common interface for all database clients."""
+
+    @abc.abstractmethod
+    def connect(self) -> None: ...
+
+    @abc.abstractmethod
+    def disconnect(self) -> None: ...
+
+    @property
+    @abc.abstractmethod
+    def is_connected(self) -> bool: ...
+
+    @property
+    @abc.abstractmethod
+    def db_type(self) -> str: ...
+
+    @abc.abstractmethod
+    def execute_query(self, sql: str) -> dict[str, Any]:
+        """Execute a SELECT query and return columns/rows/row_count/elapsed_ms."""
+
+    @abc.abstractmethod
+    def execute_statement(self, sql: str) -> dict[str, Any]:
+        """Execute DDL/DML (no result set). Returns success/error/elapsed_ms."""
+
+    @abc.abstractmethod
+    def get_schema(
+        self, schema_name: str = ""
+    ) -> Optional[dict[str, list[dict[str, str]]]]: ...
+
+    @abc.abstractmethod
+    def get_connection_info(self) -> str: ...
+
+
+# ---------------------------------------------------------------------------
+# PostgreSQL
+# ---------------------------------------------------------------------------
+class PostgreSQLClient(BaseDBClient):
+    """Client for PostgreSQL via psycopg2."""
 
     def __init__(
         self,
@@ -22,7 +76,12 @@ def __init__(
         password: str,
         sslmode: str = "prefer",
     ) -> None:
-        self.conn_params = {
+        if psycopg2 is None:
+            raise ImportError(
+                "psycopg2 is required for PostgreSQL connections. "
+                "Install it with: pip install psycopg2-binary"
+            )
+        self.conn_params: dict[str, Any] = {
             "host": host,
             "port": port,
             "dbname": database,
@@ -30,14 +89,13 @@ def __init__(
             "password": password,
             "sslmode": sslmode,
         }
-        self._conn: Optional[psycopg2.extensions.connection] = None
+        self._conn: Any = None
 
-    def connect(self) -> None:
-        """Establish a connection to PostgreSQL.
+    @property
+    def db_type(self) -> str:
+        return DB_TYPE_POSTGRESQL
 
-        Raises:
-            ConnectionError: If the database is unreachable.
-        """
+    def connect(self) -> None:
         try:
             self._conn = psycopg2.connect(**self.conn_params)
             self._conn.autocommit = True
@@ -48,43 +106,27 @@ def connect(self) -> None:
                 self.conn_params["dbname"],
             )
         except psycopg2.OperationalError as exc:
-            logger.error("Failed to connect to PostgreSQL: %s", exc)
             raise ConnectionError(f"Cannot connect to PostgreSQL: {exc}") from exc
 
     def disconnect(self) -> None:
-        """Close the database connection."""
         if self._conn and not self._conn.closed:
             self._conn.close()
             logger.info("Disconnected from PostgreSQL")
 
     @property
     def is_connected(self) -> bool:
-        """Check whether the connection is active."""
         if self._conn is None or self._conn.closed:
             return False
         try:
             with self._conn.cursor() as cur:
                 cur.execute("SELECT 1")
             return True
-        except psycopg2.Error:
+        except Exception:
             return False
 
     def execute_query(self, sql: str) -> dict[str, Any]:
-        """Execute a SQL query and return results.
-
-        Args:
-            sql: The SQL query string to execute.
-
-        Returns:
-            A dict with 'columns', 'rows', 'row_count', and 'elapsed_ms'.
-
-        Raises:
-            ConnectionError: If not connected to the database.
-            RuntimeError: If the query fails.
-        """
         if not self.is_connected:
             raise ConnectionError("Not connected to PostgreSQL. Please connect first.")
-
         start = time.monotonic()
         try:
             with self._conn.cursor(
@@ -102,10 +144,25 @@ def execute_query(self, sql: str) -> dict[str, Any]:
                     "row_count": len(rows),
                     "elapsed_ms": round(elapsed * 1000, 2),
                 }
-        except psycopg2.Error as exc:
+        except Exception as exc:
             elapsed = time.monotonic() - start
             logger.error("Query execution failed: %s", exc)
+            return {"error": str(exc).strip(), "elapsed_ms": round(elapsed * 1000, 2)}
+
+    def execute_statement(self, sql: str) -> dict[str, Any]:
+        if not self.is_connected:
+            raise ConnectionError("Not connected to PostgreSQL. Please connect first.")
+        start = time.monotonic()
+        try:
+            with self._conn.cursor() as cur:
+                cur.execute(sql)
+            elapsed = time.monotonic() - start
+            return {"success": True, "elapsed_ms": round(elapsed * 1000, 2)}
+        except Exception as exc:
+            elapsed = time.monotonic() - start
+            logger.error("Statement execution failed: %s", exc)
             return {
+                "success": False,
                 "error": str(exc).strip(),
                 "elapsed_ms": round(elapsed * 1000, 2),
             }
@@ -113,15 +170,6 @@ def execute_query(self, sql: str) -> dict[str, Any]:
     def get_schema(
         self, schema_name: str = "public"
     ) -> Optional[dict[str, list[dict[str, str]]]]:
-        """Retrieve database schema metadata.
-
-        Args:
-            schema_name: The PostgreSQL schema to inspect.
-
-        Returns:
-            Schema metadata dict mapping table names to column info lists,
-            or None on failure.
-        """
         sql = """
             SELECT
                 t.table_name,
@@ -139,14 +187,13 @@ def get_schema(
         """
         if not self.is_connected:
             return None
-
         try:
             with self._conn.cursor(
                 cursor_factory=psycopg2.extras.RealDictCursor
             ) as cur:
                 cur.execute(sql, (schema_name,))
                 rows = cur.fetchall()
-        except psycopg2.Error as exc:
+        except Exception as exc:
             logger.warning("Failed to fetch schema: %s", exc)
             return None
 
@@ -159,16 +206,187 @@ def get_schema(
                 "is_nullable": row["is_nullable"],
                 "column_default": row["column_default"] or "",
             }
-            if table not in schema:
-                schema[table] = []
-            schema[table].append(col_info)
+            schema.setdefault(table, []).append(col_info)
+        return schema
+
+    def get_connection_info(self) -> str:
+        p = self.conn_params
+        return f"{p['user']}@{p['host']}:{p['port']}/{p['dbname']}"
+
+
+# ---------------------------------------------------------------------------
+# Oracle
+# ---------------------------------------------------------------------------
+class OracleClient(BaseDBClient):
+    """Client for Oracle via python-oracledb (thin mode, no Oracle Client needed)."""
+
+    def __init__(
+        self,
+        host: str,
+        port: int,
+        service_name: str,
+        user: str,
+        password: str,
+    ) -> None:
+        if oracledb is None:
+            raise ImportError(
+                "oracledb is required for Oracle connections. "
+                "Install it with: pip install oracledb"
+            )
+        self._host = host
+        self._port = port
+        self._service_name = service_name
+        self._user = user
+        self._password = password
+        self._dsn = f"{host}:{port}/{service_name}"
+        self._conn: Any = None
+
+    @property
+    def db_type(self) -> str:
+        return DB_TYPE_ORACLE
 
+    def connect(self) -> None:
+        try:
+            self._conn = oracledb.connect(
+                user=self._user, password=self._password, dsn=self._dsn
+            )
+            logger.info("Connected to Oracle at %s", self._dsn)
+        except oracledb.Error as exc:
+            raise ConnectionError(f"Cannot connect to Oracle: {exc}") from exc
+
+    def disconnect(self) -> None:
+        if self._conn is not None:
+            try:
+                self._conn.close()
+                logger.info("Disconnected from Oracle")
+            except Exception:
+                pass
+            self._conn = None
+
+    @property
+    def is_connected(self) -> bool:
+        if self._conn is None:
+            return False
+        try:
+            with self._conn.cursor() as cur:
+                cur.execute("SELECT 1 FROM DUAL")
+            return True
+        except Exception:
+            return False
+
+    def execute_query(self, sql: str) -> dict[str, Any]:
+        if not self.is_connected:
+            raise ConnectionError("Not connected to Oracle. Please connect first.")
+        start = time.monotonic()
+        try:
+            with self._conn.cursor() as cur:
+                cur.execute(sql)
+                if cur.description:
+                    columns = [desc[0] for desc in cur.description]
+                    raw_rows = cur.fetchall()
+                    elapsed = time.monotonic() - start
+                    rows = [dict(zip(columns, r)) for r in raw_rows]
+                    return {
+                        "columns": columns,
+                        "rows": rows,
+                        "row_count": len(rows),
+                        "elapsed_ms": round(elapsed * 1000, 2),
+                    }
+                elapsed = time.monotonic() - start
+                return {
+                    "columns": [],
+                    "rows": [],
+                    "row_count": 0,
+                    "elapsed_ms": round(elapsed * 1000, 2),
+                }
+        except Exception as exc:
+            elapsed = time.monotonic() - start
+            logger.error("Query execution failed: %s", exc)
+            return {"error": str(exc).strip(), "elapsed_ms": round(elapsed * 1000, 2)}
+
+    def execute_statement(self, sql: str) -> dict[str, Any]:
+        if not self.is_connected:
+            raise ConnectionError("Not connected to Oracle. Please connect first.")
+        start = time.monotonic()
+        try:
+            with self._conn.cursor() as cur:
+                cur.execute(sql)
+            self._conn.commit()
+            elapsed = time.monotonic() - start
+            return {"success": True, "elapsed_ms": round(elapsed * 1000, 2)}
+        except Exception as exc:
+            elapsed = time.monotonic() - start
+            logger.error("Statement execution failed: %s", exc)
+            return {
+                "success": False,
+                "error": str(exc).strip(),
+                "elapsed_ms": round(elapsed * 1000, 2),
+            }
+
+    def get_schema(
+        self, schema_name: str = ""
+    ) -> Optional[dict[str, list[dict[str, str]]]]:
+        if not schema_name:
+            schema_name = self._user.upper()
+        sql = """
+            SELECT table_name, column_name, data_type, nullable, data_default
+            FROM all_tab_columns
+            WHERE owner = :owner
+            ORDER BY table_name, column_id
+        """
+        if not self.is_connected:
+            return None
+        try:
+            with self._conn.cursor() as cur:
+                cur.execute(sql, {"owner": schema_name})
+                raw_rows = cur.fetchall()
+        except Exception as exc:
+            logger.warning("Failed to fetch Oracle schema: %s", exc)
+            return None
+
+        schema: dict[str, list[dict[str, str]]] = {}
+        for row in raw_rows:
+            table = row[0]
+            col_info = {
+                "column_name": row[1],
+                "data_type": row[2],
+                "is_nullable": "YES" if row[3] == "Y" else "NO",
+                "column_default": str(row[4]) if row[4] else "",
+            }
+            schema.setdefault(table, []).append(col_info)
         return schema
 
     def get_connection_info(self) -> str:
-        """Return a display-friendly connection string (password masked)."""
-        return (
-            f"{self.conn_params['user']}@"
-            f"{self.conn_params['host']}:{self.conn_params['port']}/"
-            f"{self.conn_params['dbname']}"
+        return f"{self._user}@{self._dsn}"
+
+
+# ---------------------------------------------------------------------------
+# Factory
+# ---------------------------------------------------------------------------
+def create_db_client(db_type: str, **kwargs: Any) -> BaseDBClient:
+    """Create a database client for the given type.
+
+    Args:
+        db_type: One of 'postgresql' or 'oracle'.
+        **kwargs: Connection parameters forwarded to the client constructor.
+    """
+    if db_type == DB_TYPE_POSTGRESQL:
+        return PostgreSQLClient(
+            host=kwargs["host"],
+            port=kwargs["port"],
+            database=kwargs["database"],
+            user=kwargs["user"],
+            password=kwargs["password"],
+            sslmode=kwargs.get("sslmode", "prefer"),
+        )
+    if db_type == DB_TYPE_ORACLE:
+        return OracleClient(
+            host=kwargs["host"],
+            port=kwargs["port"],
+            service_name=kwargs["service_name"],
+            user=kwargs["user"],
+            password=kwargs["password"],
         )
+    raise ValueError(
+        f"Unsupported database type: {db_type!r}. Supported: {SUPPORTED_DB_TYPES}"
+    )
diff --git a/tools/pg-assistant/profile_manager.py b/tools/pg-assistant/profile_manager.py
index 456d65b..8a1c2b4 100644
--- a/tools/pg-assistant/profile_manager.py
+++ b/tools/pg-assistant/profile_manager.py
@@ -1,4 +1,4 @@
-"""Database connection profile manager — save and load profiles as JSON."""
+"""Database connection profile manager -- save and load profiles as JSON."""
 
 import json
 import logging
@@ -51,43 +51,42 @@ def list_profiles(self) -> list[str]:
         return list(self._load_all().keys())
 
     def get_profile(self, name: str) -> Optional[dict[str, Any]]:
-        """Retrieve a saved profile by name.
-
-        Args:
-            name: The profile name.
-
-        Returns:
-            A dict with connection parameters, or None if not found.
-        """
+        """Retrieve a saved profile by name."""
         profiles = self._load_all()
         return profiles.get(name)
 
     def save_profile(
         self,
         name: str,
+        db_type: str,
         host: str,
         port: int,
-        database: str,
         user: str,
         password: str,
+        database: str = "",
+        service_name: str = "",
         sslmode: str = "prefer",
     ) -> None:
         """Save a database connection profile.
 
         Args:
             name: A friendly name for the profile.
-            host: PostgreSQL host.
-            port: PostgreSQL port.
-            database: Database name.
+            db_type: 'postgresql' or 'oracle'.
+            host: Database host.
+            port: Database port.
             user: Database user.
             password: Database password.
-            sslmode: SSL mode (default: prefer).
+            database: Database name (PostgreSQL).
+            service_name: Service name (Oracle).
+            sslmode: SSL mode (PostgreSQL only, default: prefer).
         """
         profiles = self._load_all()
         profiles[name] = {
+            "db_type": db_type,
             "host": host,
             "port": port,
             "database": database,
+            "service_name": service_name,
             "user": user,
             "password": password,
             "sslmode": sslmode,
@@ -98,9 +97,6 @@ def save_profile(
     def delete_profile(self, name: str) -> bool:
         """Delete a saved profile.
 
-        Args:
-            name: The profile name to delete.
-
         Returns:
             True if deleted, False if not found.
         """
diff --git a/tools/pg-assistant/requirements.txt b/tools/pg-assistant/requirements.txt
index 7ef45a6..8efff7e 100644
--- a/tools/pg-assistant/requirements.txt
+++ b/tools/pg-assistant/requirements.txt
@@ -1,4 +1,5 @@
 requests>=2.31.0,<3.0.0
 psycopg2-binary>=2.9.0,<3.0.0
+oracledb>=2.0.0,<3.0.0
 streamlit>=1.28.0,<2.0.0
 pandas>=2.0.0,<3.0.0
diff --git a/tools/pg-assistant/sql_generator.py b/tools/pg-assistant/sql_generator.py
index 42dd313..798df2f 100644
--- a/tools/pg-assistant/sql_generator.py
+++ b/tools/pg-assistant/sql_generator.py
@@ -8,7 +8,7 @@
 
 logger = logging.getLogger(__name__)
 
-SYSTEM_PROMPT = (
+_PG_SYSTEM_PROMPT = (
     "You are a PostgreSQL expert. You receive natural language questions about "
     "a PostgreSQL database and return ONLY valid SQL SELECT queries. "
     "Rules:\n"
@@ -21,6 +21,20 @@
     "-- CANNOT_GENERATE"
 )
 
+_ORA_SYSTEM_PROMPT = (
+    "You are an Oracle Database expert. You receive natural language questions about "
+    "an Oracle database and return ONLY valid SQL SELECT queries. "
+    "Rules:\n"
+    "- Return ONLY the SQL query, nothing else.\n"
+    "- Do NOT include explanations, comments, or markdown formatting.\n"
+    "- Do NOT use DROP, DELETE, TRUNCATE, UPDATE, INSERT, ALTER, CREATE, or GRANT.\n"
+    "- Only generate SELECT statements.\n"
+    "- Use Oracle SQL syntax (e.g. ROWNUM, FETCH FIRST, NVL, DUAL, etc.).\n"
+    "- Always terminate the query with a semicolon.\n"
+    "- If the question cannot be answered with a SELECT query, respond with: "
+    "-- CANNOT_GENERATE"
+)
+
 DANGEROUS_KEYWORDS = frozenset(
     {
         "DROP",
@@ -54,17 +68,21 @@ class SQLGenerator:
     def __init__(
         self,
         llm_client: LLMClient,
+        db_type: str = "postgresql",
         schema_metadata: Optional[dict[str, Any]] = None,
     ) -> None:
         self.llm_client = llm_client
+        self.db_type = db_type
         self.schema_metadata = schema_metadata
 
-    def update_schema(self, schema_metadata: dict[str, Any]) -> None:
-        """Update the schema metadata used for prompt context.
+    @property
+    def system_prompt(self) -> str:
+        if self.db_type == "oracle":
+            return _ORA_SYSTEM_PROMPT
+        return _PG_SYSTEM_PROMPT
 
-        Args:
-            schema_metadata: Dict mapping table names to column info lists.
-        """
+    def update_schema(self, schema_metadata: dict[str, Any]) -> None:
+        """Update the schema metadata used for prompt context."""
         self.schema_metadata = schema_metadata
         logger.info("Schema metadata updated: %d tables", len(schema_metadata))
 
@@ -99,7 +117,7 @@ def generate_sql(self, user_query: str) -> str:
             try:
                 raw_response = self.llm_client.generate(
                     prompt=retry_prompt,
-                    system_prompt=SYSTEM_PROMPT,
+                    system_prompt=self.system_prompt,
                 )
             except (ConnectionError, RuntimeError) as exc:
                 logger.error("LLM request failed: %s", exc)
@@ -130,18 +148,12 @@ def generate_sql(self, user_query: str) -> str:
         )
 
     def _build_prompt(self, user_query: str) -> str:
-        """Build the full prompt including schema context.
-
-        Args:
-            user_query: The natural language question.
-
-        Returns:
-            The complete prompt string.
-        """
+        """Build the full prompt including schema context."""
         parts = []
 
         if self.schema_metadata:
-            parts.append("Database schema:")
+            db_label = "Oracle" if self.db_type == "oracle" else "PostgreSQL"
+            parts.append(f"Database schema ({db_label}):")
             for table_name, columns in self.schema_metadata.items():
                 col_defs = []
                 for col in columns:
@@ -164,16 +176,7 @@ def _build_prompt(self, user_query: str) -> str:
 
     @staticmethod
     def _extract_sql(raw_response: str) -> str:
-        """Extract clean SQL from the LLM response.
-
-        Strips markdown code blocks, comments, and extra whitespace.
-
-        Args:
-            raw_response: The raw LLM output.
-
-        Returns:
-            A cleaned SQL string.
-        """
+        """Extract clean SQL from the LLM response."""
         text = raw_response.strip()
 
         # Remove markdown code fences
@@ -204,9 +207,6 @@ def _extract_sql(raw_response: str) -> str:
     def _validate_sql(sql: str) -> None:
         """Validate that the SQL is a safe SELECT query.
 
-        Args:
-            sql: The SQL query to validate.
-
         Raises:
             UnsafeSQLError: If the query contains dangerous keywords.
             ValueError: If the query is not a valid SELECT statement.

From 94e3973007c2c6f030e47ad3295eca8ca346bce0 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sun, 5 Apr 2026 02:55:05 +0000
Subject: [PATCH 05/19] Increase Ollama timeout to 300s and add configurable
 timeout slider in UI

- Default timeout increased from 120s to 300s (first model load is slow)
- Added timeout slider (60-600s) in Ollama Settings sidebar
- Improved timeout error message with troubleshooting hint
---
 tools/pg-assistant/app.py        | 7 ++++++-
 tools/pg-assistant/llm_client.py | 6 ++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index d830787..be9ce74 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -74,6 +74,9 @@ def _connected_db_type() -> str:
     st.subheader("🤖 Ollama Settings")
     ollama_url = st.text_input("Ollama URL", value="http://localhost:11434")
     ollama_model = st.text_input("Model", value="codellama")
+    ollama_timeout = st.slider(
+        "Request timeout (seconds)", 60, 600, 300, step=30, key="ollama_timeout"
+    )
 
     if st.button("Test Ollama Connection"):
         test_llm = LLMClient(base_url=ollama_url, model=ollama_model)
@@ -173,7 +176,9 @@ def _connected_db_type() -> str:
             db.connect()
             st.session_state.db_client = db
 
-            llm = LLMClient(base_url=ollama_url, model=ollama_model)
+            llm = LLMClient(
+                base_url=ollama_url, model=ollama_model, timeout=ollama_timeout
+            )
             st.session_state.llm_client = llm
             gen = SQLGenerator(llm_client=llm, db_type=selected_db_type)
             st.session_state.sql_generator = gen
diff --git a/tools/pg-assistant/llm_client.py b/tools/pg-assistant/llm_client.py
index bb34846..4f3e2c8 100644
--- a/tools/pg-assistant/llm_client.py
+++ b/tools/pg-assistant/llm_client.py
@@ -10,7 +10,7 @@
 
 DEFAULT_OLLAMA_URL = "http://localhost:11434"
 DEFAULT_MODEL = "codellama"
-DEFAULT_TIMEOUT = 120
+DEFAULT_TIMEOUT = 300
 
 
 class LLMClient:
@@ -67,7 +67,9 @@ def generate(self, prompt: str, system_prompt: str = "") -> str:
         except requests.Timeout as exc:
             logger.error("Ollama request timed out after %ds", self.timeout)
             raise RuntimeError(
-                f"Ollama request timed out after {self.timeout}s."
+                f"Ollama request timed out after {self.timeout}s. "
+                "Try increasing the timeout in the sidebar settings, or ensure "
+                "the model is fully loaded (first request is slower)."
             ) from exc
 
         elapsed = time.monotonic() - start

From 3cd866190294e19d240ad26fe9bd2f9982d9e03f Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sun, 5 Apr 2026 03:26:29 +0000
Subject: [PATCH 06/19] Fix Oracle SQL compatibility and add auto-retry on DB
 errors

- Update Oracle system prompt to use ROWNUM instead of FETCH FIRST/OFFSET
  (compatible with Oracle 11g+, fixes ORA-00933)
- Increase MAX_RETRIES from 2 to 3 for SQL generation
- Add auto-retry in Query tab: when a query fails with a DB error, the
  error is fed back to the LLM to regenerate corrected SQL automatically
- Explicit Oracle syntax guidance: NVL, DUAL, TO_DATE, subquery for ORDER BY + ROWNUM
---
 tools/pg-assistant/app.py           | 119 +++++++++++++++++-----------
 tools/pg-assistant/sql_generator.py |   7 +-
 2 files changed, 78 insertions(+), 48 deletions(-)

diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index be9ce74..eb7ca9e 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -321,54 +321,81 @@ def _connected_db_type() -> str:
                     gen_elapsed = 0
 
             if sql:
-                st.subheader("Generated SQL")
-                st.code(sql, language="sql")
-                st.caption(f"Generated in {gen_elapsed:.2f}s")
-
-                with st.spinner("Executing query..."):
-                    result = db.execute_query(sql)
-
-                if "error" in result:
-                    st.error(f"**Query Error:** {result['error']}")
-                    st.session_state.query_history.append(
-                        {
-                            "question": user_question.strip(),
-                            "sql": sql,
-                            "status": "error",
-                            "error": result["error"],
-                            "elapsed_ms": result.get("elapsed_ms", 0),
-                        }
-                    )
-                else:
-                    rows = result.get("rows", [])
-                    row_count = result.get("row_count", 0)
-                    elapsed_ms = result.get("elapsed_ms", 0)
-
-                    st.subheader("Results")
-                    if rows:
-                        df = pd.DataFrame(rows)
-                        st.dataframe(df, use_container_width=True)
-                        st.caption(f"{row_count} row(s) returned in {elapsed_ms}ms")
-
-                        csv = df.to_csv(index=False)
-                        st.download_button(
-                            "📥 Download CSV",
-                            csv,
-                            file_name="query_results.csv",
-                            mime="text/csv",
+                max_exec_retries = 2
+                for exec_attempt in range(1, max_exec_retries + 1):
+                    st.subheader("Generated SQL")
+                    st.code(sql, language="sql")
+                    st.caption(f"Generated in {gen_elapsed:.2f}s")
+
+                    with st.spinner("Executing query..."):
+                        result = db.execute_query(sql)
+
+                    if "error" in result and exec_attempt < max_exec_retries:
+                        db_error = result["error"]
+                        st.warning(
+                            f"**Query failed** (attempt {exec_attempt}): {db_error}\n\n"
+                            "Regenerating SQL with error feedback..."
+                        )
+                        with st.spinner("Regenerating SQL with error context..."):
+                            retry_start = time.monotonic()
+                            try:
+                                sql = generator.generate_sql(
+                                    f"{user_question.strip()}\n\n"
+                                    f"IMPORTANT: The previous SQL failed with this "
+                                    f"database error: {db_error}\n"
+                                    f"Previous failing SQL: {sql}\n"
+                                    f"Please generate a corrected query that avoids "
+                                    f"this error."
+                                )
+                                gen_elapsed = time.monotonic() - retry_start
+                            except (UnsafeSQLError, SQLGenerationError) as exc:
+                                st.error(f"**Retry failed:** {exc}")
+                                sql = None
+                                break
+                        continue
+
+                    if "error" in result:
+                        st.error(f"**Query Error:** {result['error']}")
+                        st.session_state.query_history.append(
+                            {
+                                "question": user_question.strip(),
+                                "sql": sql,
+                                "status": "error",
+                                "error": result["error"],
+                                "elapsed_ms": result.get("elapsed_ms", 0),
+                            }
                         )
                     else:
-                        st.info("Query returned no results.")
-
-                    st.session_state.query_history.append(
-                        {
-                            "question": user_question.strip(),
-                            "sql": sql,
-                            "status": "success",
-                            "row_count": row_count,
-                            "elapsed_ms": elapsed_ms,
-                        }
-                    )
+                        rows = result.get("rows", [])
+                        row_count = result.get("row_count", 0)
+                        elapsed_ms = result.get("elapsed_ms", 0)
+
+                        st.subheader("Results")
+                        if rows:
+                            df = pd.DataFrame(rows)
+                            st.dataframe(df, use_container_width=True)
+                            st.caption(f"{row_count} row(s) returned in {elapsed_ms}ms")
+
+                            csv = df.to_csv(index=False)
+                            st.download_button(
+                                "📥 Download CSV",
+                                csv,
+                                file_name="query_results.csv",
+                                mime="text/csv",
+                            )
+                        else:
+                            st.info("Query returned no results.")
+
+                        st.session_state.query_history.append(
+                            {
+                                "question": user_question.strip(),
+                                "sql": sql,
+                                "status": "success",
+                                "row_count": row_count,
+                                "elapsed_ms": elapsed_ms,
+                            }
+                        )
+                    break
 
 # ---- Schema tab -----------------------------------------------------------
 with tab_schema:
diff --git a/tools/pg-assistant/sql_generator.py b/tools/pg-assistant/sql_generator.py
index 798df2f..00985af 100644
--- a/tools/pg-assistant/sql_generator.py
+++ b/tools/pg-assistant/sql_generator.py
@@ -29,7 +29,10 @@
     "- Do NOT include explanations, comments, or markdown formatting.\n"
     "- Do NOT use DROP, DELETE, TRUNCATE, UPDATE, INSERT, ALTER, CREATE, or GRANT.\n"
     "- Only generate SELECT statements.\n"
-    "- Use Oracle SQL syntax (e.g. ROWNUM, FETCH FIRST, NVL, DUAL, etc.).\n"
+    "- Use classic Oracle SQL syntax compatible with Oracle 11g and above.\n"
+    "- For limiting rows, ALWAYS use WHERE ROWNUM <= N (wrap in a subquery if "
+    "ordering is needed). NEVER use FETCH FIRST or OFFSET/FETCH.\n"
+    "- Use NVL instead of COALESCE, DUAL for dummy selects, TO_DATE for dates.\n"
     "- Always terminate the query with a semicolon.\n"
     "- If the question cannot be answered with a SELECT query, respond with: "
     "-- CANNOT_GENERATE"
@@ -51,7 +54,7 @@
     }
 )
 
-MAX_RETRIES = 2
+MAX_RETRIES = 3
 
 
 class SQLGenerationError(Exception):

From 3b30d6499e881989d70f7903ba10ad06661dc870 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sun, 5 Apr 2026 03:39:21 +0000
Subject: [PATCH 07/19] Add snap ID selectors, report upload, and
 pg_stat_statements analysis

- Oracle: AWR snap ID range selector (queries DBA_HIST_SNAPSHOT, collects
  DBA_HIST_SQLSTAT/SYSTEM_EVENT/SYSSTAT for selected range)
- PostgreSQL: pgProfile sample ID range selector (queries profile.samples,
  collects profile.stmt_list/wait_sampling_total for selected range)
- PostgreSQL: latest pg_stat_statements one-click analysis with extension check
- Both: file upload for AWR HTML/text, pg_stat_statements CSV, pgProfile reports
- Auto Analyse tab now has radio button mode selector per DB type
- Parsed report text shown in expander when no raw data available
---
 tools/pg-assistant/app.py          | 262 ++++++++++++++++++---
 tools/pg-assistant/auto_analyse.py | 353 ++++++++++++++++++++++++++++-
 2 files changed, 585 insertions(+), 30 deletions(-)

diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index eb7ca9e..7073f9d 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -569,44 +569,248 @@ def _connected_db_type() -> str:
         db_client = st.session_state.db_client
         llm_client = st.session_state.llm_client
         db_label = db_client.db_type.upper()
+        is_oracle = db_client.db_type == DB_TYPE_ORACLE
 
         st.markdown(
             f"Collects performance data from **{db_label}** "
-            f"({'AWR / V$ views' if db_client.db_type == DB_TYPE_ORACLE else 'pg_stat_statements / pg_stat_*'}) "
+            f"({'AWR / V$ views' if is_oracle else 'pg_stat_statements / pg_stat_* / pgProfile'}) "
             "and generates an AI-powered summary with action plan."
         )
 
-        acol1, acol2 = st.columns(2)
-        with acol1:
-            if st.button("📈 Collect Data Only", use_container_width=True):
-                analyser = PerformanceAnalyser(
-                    db_client=db_client, llm_client=llm_client
+        # Analysis mode selector
+        if is_oracle:
+            analyse_mode = st.radio(
+                "Analysis mode",
+                [
+                    "Live V$ views",
+                    "AWR Snap ID range",
+                    "Upload report file",
+                ],
+                horizontal=True,
+                key="analyse_mode",
+            )
+        else:
+            analyse_mode = st.radio(
+                "Analysis mode",
+                [
+                    "Live pg_stat_* views",
+                    "pgProfile Snap ID range",
+                    "Latest pg_stat_statements",
+                    "Upload report file",
+                ],
+                horizontal=True,
+                key="analyse_mode",
+            )
+
+        st.divider()
+
+        # ------- Mode: Live V$ / pg_stat_* -----------------------------------
+        if analyse_mode in ("Live V$ views", "Live pg_stat_* views"):
+            acol1, acol2 = st.columns(2)
+            with acol1:
+                if st.button("📈 Collect Data Only", use_container_width=True):
+                    analyser = PerformanceAnalyser(
+                        db_client=db_client, llm_client=llm_client
+                    )
+                    with st.spinner("Collecting performance data..."):
+                        raw_data = analyser.collect_data()
+                    st.session_state.analyser = analyser
+                    st.session_state["_last_analysis"] = {
+                        "raw_data": raw_data,
+                        "analysis": None,
+                    }
+                    st.success("Data collected!")
+
+            with acol2:
+                if st.button(
+                    "🧠 Full Analysis (Data + LLM)",
+                    use_container_width=True,
+                    type="primary",
+                ):
+                    analyser = PerformanceAnalyser(
+                        db_client=db_client, llm_client=llm_client
+                    )
+                    with st.spinner("Collecting data and running LLM analysis..."):
+                        result = analyser.analyse()
+                    st.session_state.analyser = analyser
+                    st.session_state["_last_analysis"] = result
+                    st.success("Analysis complete!")
+
+        # ------- Mode: AWR Snap ID range (Oracle) ----------------------------
+        elif analyse_mode == "AWR Snap ID range":
+            analyser = PerformanceAnalyser(db_client=db_client, llm_client=llm_client)
+            st.markdown("Select an AWR snapshot range from `DBA_HIST_SNAPSHOT`.")
+
+            if st.button("🔄 Load AWR Snapshots"):
+                with st.spinner("Querying DBA_HIST_SNAPSHOT..."):
+                    snaps = analyser.list_awr_snapshots()
+                st.session_state["_awr_snapshots"] = snaps
+
+            snaps = st.session_state.get("_awr_snapshots", [])
+            if snaps:
+                snap_df = pd.DataFrame(snaps)
+                st.dataframe(
+                    snap_df, use_container_width=True, hide_index=True, height=250
+                )
+                snap_ids = [int(s["snap_id"]) for s in snaps]
+                scol1, scol2 = st.columns(2)
+                with scol1:
+                    begin_snap = st.selectbox(
+                        "Begin Snap ID",
+                        sorted(snap_ids),
+                        index=max(0, len(snap_ids) - 2),
+                        key="awr_begin",
+                    )
+                with scol2:
+                    end_snap = st.selectbox(
+                        "End Snap ID",
+                        sorted(snap_ids),
+                        index=len(snap_ids) - 1,
+                        key="awr_end",
+                    )
+
+                if st.button(
+                    "🧠 Analyse AWR Range",
+                    use_container_width=True,
+                    type="primary",
+                ):
+                    if begin_snap >= end_snap:
+                        st.error("Begin Snap ID must be less than End Snap ID.")
+                    else:
+                        with st.spinner(
+                            f"Collecting AWR data for snaps {begin_snap}–{end_snap}..."
+                        ):
+                            result = analyser.analyse_awr_snaps(begin_snap, end_snap)
+                        st.session_state.analyser = analyser
+                        st.session_state["_last_analysis"] = result
+                        st.success("AWR analysis complete!")
+            else:
+                st.info("Click 'Load AWR Snapshots' to list available snapshot IDs.")
+
+        # ------- Mode: pgProfile Snap ID range (PostgreSQL) ------------------
+        elif analyse_mode == "pgProfile Snap ID range":
+            analyser = PerformanceAnalyser(db_client=db_client, llm_client=llm_client)
+            st.markdown(
+                "Select a pgProfile sample range from `profile.samples`. "
+                "Requires the [pgProfile](https://github.com/zubkov-andrei/pg_profile) extension."
+            )
+
+            if st.button("🔄 Load pgProfile Samples"):
+                with st.spinner("Querying profile.samples..."):
+                    samples = analyser.list_pgprofile_samples()
+                if not samples:
+                    st.warning(
+                        "No pgProfile samples found. Is the pgProfile extension "
+                        "installed and configured?"
+                    )
+                st.session_state["_pgprofile_samples"] = samples
+
+            samples = st.session_state.get("_pgprofile_samples", [])
+            if samples:
+                samp_df = pd.DataFrame(samples)
+                st.dataframe(
+                    samp_df, use_container_width=True, hide_index=True, height=250
                 )
-                with st.spinner("Collecting performance data..."):
-                    raw_data = analyser.collect_data()
-                st.session_state.analyser = analyser
-                st.session_state["_last_analysis"] = {
-                    "raw_data": raw_data,
-                    "analysis": None,
-                }
-                st.success("Data collected!")
-
-        with acol2:
+                sample_ids = [int(s["sample_id"]) for s in samples]
+                pcol1, pcol2 = st.columns(2)
+                with pcol1:
+                    begin_sample = st.selectbox(
+                        "Begin Sample ID",
+                        sorted(sample_ids),
+                        index=max(0, len(sample_ids) - 2),
+                        key="pgp_begin",
+                    )
+                with pcol2:
+                    end_sample = st.selectbox(
+                        "End Sample ID",
+                        sorted(sample_ids),
+                        index=len(sample_ids) - 1,
+                        key="pgp_end",
+                    )
+
+                if st.button(
+                    "🧠 Analyse pgProfile Range",
+                    use_container_width=True,
+                    type="primary",
+                ):
+                    if begin_sample >= end_sample:
+                        st.error("Begin Sample ID must be less than End Sample ID.")
+                    else:
+                        with st.spinner(
+                            f"Collecting pgProfile data for samples "
+                            f"{begin_sample}–{end_sample}..."
+                        ):
+                            result = analyser.analyse_pgprofile_snaps(
+                                begin_sample, end_sample
+                            )
+                        st.session_state.analyser = analyser
+                        st.session_state["_last_analysis"] = result
+                        st.success("pgProfile analysis complete!")
+            else:
+                st.info("Click 'Load pgProfile Samples' to list available sample IDs.")
+
+        # ------- Mode: Latest pg_stat_statements (PostgreSQL) ----------------
+        elif analyse_mode == "Latest pg_stat_statements":
+            analyser = PerformanceAnalyser(db_client=db_client, llm_client=llm_client)
+            st.markdown(
+                "Collects the **latest cumulative snapshot** from "
+                "`pg_stat_statements` plus table, database, bgwriter stats "
+                "and unused indexes."
+            )
+
             if st.button(
-                "🧠 Full Analysis (Data + LLM)",
+                "🧠 Analyse Latest pg_stat_statements",
                 use_container_width=True,
                 type="primary",
             ):
-                analyser = PerformanceAnalyser(
-                    db_client=db_client, llm_client=llm_client
-                )
-                with st.spinner("Collecting data and running LLM analysis..."):
-                    result = analyser.analyse()
-                st.session_state.analyser = analyser
-                st.session_state["_last_analysis"] = result
-                st.success("Analysis complete!")
+                with st.spinner("Checking pg_stat_statements extension..."):
+                    has_ext = analyser.check_pg_stat_statements()
+                if not has_ext:
+                    st.error(
+                        "pg_stat_statements extension is not installed. "
+                        "Run `CREATE EXTENSION pg_stat_statements;` first."
+                    )
+                else:
+                    with st.spinner(
+                        "Collecting pg_stat_statements data and running LLM analysis..."
+                    ):
+                        result = analyser.analyse_pg_stat_latest()
+                    st.session_state.analyser = analyser
+                    st.session_state["_last_analysis"] = result
+                    st.success("pg_stat_statements analysis complete!")
+
+        # ------- Mode: Upload report file ------------------------------------
+        elif analyse_mode == "Upload report file":
+            st.markdown(
+                "Upload an **AWR report** (HTML/text), **pg_stat_statements CSV**, "
+                "or **pgProfile report** (HTML/text) for LLM-powered analysis."
+            )
+            uploaded_file = st.file_uploader(
+                "Choose a report file",
+                type=["html", "htm", "txt", "csv", "log"],
+                key="report_upload",
+            )
+            if uploaded_file is not None:
+                if st.button(
+                    "🧠 Analyse Uploaded Report",
+                    use_container_width=True,
+                    type="primary",
+                ):
+                    analyser = PerformanceAnalyser(
+                        db_client=db_client, llm_client=llm_client
+                    )
+                    file_content = uploaded_file.getvalue().decode(
+                        "utf-8", errors="replace"
+                    )
+                    with st.spinner(f"Parsing and analysing {uploaded_file.name}..."):
+                        result = analyser.analyse_uploaded_report(
+                            file_content, uploaded_file.name
+                        )
+                    st.session_state.analyser = analyser
+                    st.session_state["_last_analysis"] = result
+                    st.success("Report analysis complete!")
 
-        # Display analysis results
+        # ------- Display analysis results (shared across all modes) ----------
         last = st.session_state.get("_last_analysis")
         if last:
             st.divider()
@@ -620,7 +824,7 @@ def _connected_db_type() -> str:
                 st.divider()
                 st.subheader("Raw Performance Data")
                 for section_name, section_data in raw.items():
-                    if section_name == "db_type":
+                    if section_name in ("db_type", "snap_range", "sample_range"):
                         continue
                     label = section_name.replace("_", " ").title()
                     with st.expander(f"📊 {label}"):
@@ -635,6 +839,10 @@ def _connected_db_type() -> str:
                         else:
                             st.info("No data available.")
 
+            if last.get("report_text") and not raw:
+                with st.expander("📄 Parsed Report Text"):
+                    st.text(last["report_text"][:5000])
+
 # ---- History tab ----------------------------------------------------------
 with tab_history:
     st.subheader("Query History")
diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index 737a3a7..3411e25 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -1,6 +1,15 @@
-"""Performance analysis for Oracle (AWR/V$) and PostgreSQL (pg_stat_statements)."""
+"""Performance analysis for Oracle (AWR/V$) and PostgreSQL (pg_stat_statements).
 
+Supports three analysis modes:
+1. Live collection from V$/pg_stat_* views
+2. AWR snap-ID based report generation (Oracle)
+3. Uploaded report file parsing (AWR HTML/text, pg_stat_statements CSV, pgProfile)
+"""
+
+import csv
+import io
 import logging
+import re
 from typing import Any
 
 from db_client import BaseDBClient, DB_TYPE_ORACLE, DB_TYPE_POSTGRESQL
@@ -78,6 +87,121 @@
     ) WHERE ROWNUM <= 20
 """
 
+# ---------------------------------------------------------------------------
+# Oracle AWR snapshot queries
+# ---------------------------------------------------------------------------
+_ORA_LIST_SNAPSHOTS = """
+    SELECT
+        snap_id,
+        dbid,
+        instance_number,
+        TO_CHAR(begin_interval_time, 'YYYY-MM-DD HH24:MI') AS begin_time,
+        TO_CHAR(end_interval_time, 'YYYY-MM-DD HH24:MI') AS end_time
+    FROM dba_hist_snapshot
+    ORDER BY snap_id DESC
+"""
+
+_ORA_AWR_TOP_SQL = """
+    SELECT * FROM (
+        SELECT
+            s.sql_id,
+            s.plan_hash_value,
+            SUM(s.elapsed_time_delta) / 1e6 AS elapsed_sec,
+            SUM(s.executions_delta) AS executions,
+            SUM(s.buffer_gets_delta) AS buffer_gets,
+            SUM(s.disk_reads_delta) AS disk_reads,
+            DBMS_LOB.SUBSTR(t.sql_text, 200, 1) AS sql_text
+        FROM dba_hist_sqlstat s
+        JOIN dba_hist_sqltext t ON s.sql_id = t.sql_id AND s.dbid = t.dbid
+        WHERE s.snap_id BETWEEN :begin_snap AND :end_snap
+        GROUP BY s.sql_id, s.plan_hash_value,
+                 DBMS_LOB.SUBSTR(t.sql_text, 200, 1)
+        ORDER BY elapsed_sec DESC
+    ) WHERE ROWNUM <= 20
+"""
+
+_ORA_AWR_WAIT_EVENTS = """
+    SELECT * FROM (
+        SELECT
+            event_name AS event,
+            SUM(total_waits_fg) AS total_waits,
+            ROUND(SUM(time_waited_micro_fg) / 1e6, 2) AS time_waited_sec
+        FROM dba_hist_system_event
+        WHERE snap_id BETWEEN :begin_snap AND :end_snap
+          AND wait_class != 'Idle'
+        GROUP BY event_name
+        ORDER BY time_waited_sec DESC
+    ) WHERE ROWNUM <= 20
+"""
+
+_ORA_AWR_SYS_STATS = """
+    SELECT
+        stat_name AS name,
+        SUM(value) AS value
+    FROM dba_hist_sysstat
+    WHERE snap_id BETWEEN :begin_snap AND :end_snap
+      AND stat_name IN (
+        'db block gets', 'consistent gets', 'physical reads',
+        'redo size', 'sorts (memory)', 'sorts (disk)',
+        'rows processed', 'parse count (total)', 'parse count (hard)',
+        'execute count', 'user commits', 'user rollbacks'
+    )
+    GROUP BY stat_name
+    ORDER BY stat_name
+"""
+
+# ---------------------------------------------------------------------------
+# PostgreSQL pgProfile snapshot queries
+# ---------------------------------------------------------------------------
+_PG_LIST_PGPROFILE_SAMPLES = """
+    SELECT
+        sample_id,
+        sample_time::text AS sample_time,
+        server_name
+    FROM profile.samples
+    ORDER BY sample_id DESC
+    LIMIT 100
+"""
+
+_PG_PGPROFILE_TOP_SQL = """
+    SELECT
+        queryid,
+        LEFT(query, 200) AS query_text,
+        calls,
+        ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec,
+        ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec,
+        rows,
+        shared_blks_hit,
+        shared_blks_read
+    FROM profile.stmt_list sl
+    JOIN profile.sample_statements ss ON sl.queryid_md5 = ss.queryid_md5
+    WHERE ss.sample_id BETWEEN {begin_sample} AND {end_sample}
+    ORDER BY total_exec_time DESC
+    LIMIT 20
+"""
+
+_PG_PGPROFILE_WAIT_EVENTS = """
+    SELECT
+        event_type,
+        event,
+        SUM(tot_waited)::numeric AS total_waited_sec,
+        SUM(tot_waits) AS total_waits
+    FROM profile.wait_sampling_total
+    WHERE sample_id BETWEEN {begin_sample} AND {end_sample}
+    GROUP BY event_type, event
+    ORDER BY total_waited_sec DESC
+    LIMIT 20
+"""
+
+# ---------------------------------------------------------------------------
+# PostgreSQL pg_stat_statements snapshot (latest cumulative)
+# ---------------------------------------------------------------------------
+_PG_STAT_STATEMENTS_EXISTS = """
+    SELECT COUNT(*) AS cnt
+    FROM pg_extension
+    WHERE extname = 'pg_stat_statements'
+"""
+
 # ---------------------------------------------------------------------------
 # PostgreSQL performance queries
 # ---------------------------------------------------------------------------
@@ -178,6 +302,8 @@ def __init__(
         self.db_client = db_client
         self.llm_client = llm_client
 
+    # -- public API ----------------------------------------------------------
+
     def collect_data(self) -> dict[str, Any]:
         """Collect raw performance data from the database."""
         if self.db_client.db_type == DB_TYPE_ORACLE:
@@ -187,8 +313,58 @@ def collect_data(self) -> dict[str, Any]:
     def analyse(self) -> dict[str, Any]:
         """Collect data, generate LLM analysis, and return everything."""
         raw_data = self.collect_data()
-        report_text = self._format_report(raw_data)
+        return self._run_llm_analysis(raw_data)
+
+    def analyse_awr_snaps(self, begin_snap: int, end_snap: int) -> dict[str, Any]:
+        """Collect AWR data for a snap-ID range and generate LLM analysis."""
+        raw_data = self._collect_oracle_awr(begin_snap, end_snap)
+        return self._run_llm_analysis(raw_data)
+
+    def analyse_uploaded_report(
+        self, file_content: str, file_name: str
+    ) -> dict[str, Any]:
+        """Parse an uploaded report file and generate LLM analysis."""
+        parsed = parse_uploaded_report(file_content, file_name)
+        return self._run_llm_analysis_from_text(parsed)
+
+    def list_awr_snapshots(self) -> list[dict[str, Any]]:
+        """Return available AWR snapshots from DBA_HIST_SNAPSHOT."""
+        result = self.db_client.execute_query(_ORA_LIST_SNAPSHOTS)
+        if "error" in result:
+            return []
+        return result.get("rows", [])
 
+    def list_pgprofile_samples(self) -> list[dict[str, Any]]:
+        """Return available pgProfile samples from profile.samples."""
+        result = self.db_client.execute_query(_PG_LIST_PGPROFILE_SAMPLES)
+        if "error" in result:
+            return []
+        return result.get("rows", [])
+
+    def analyse_pgprofile_snaps(
+        self, begin_sample: int, end_sample: int
+    ) -> dict[str, Any]:
+        """Collect pgProfile data for a sample-ID range and run LLM analysis."""
+        raw_data = self._collect_pgprofile(begin_sample, end_sample)
+        return self._run_llm_analysis(raw_data)
+
+    def analyse_pg_stat_latest(self) -> dict[str, Any]:
+        """Collect latest pg_stat_statements data and run LLM analysis."""
+        raw_data = self._collect_postgresql()
+        return self._run_llm_analysis(raw_data)
+
+    def check_pg_stat_statements(self) -> bool:
+        """Check if pg_stat_statements extension is installed."""
+        result = self.db_client.execute_query(_PG_STAT_STATEMENTS_EXISTS)
+        if "error" in result:
+            return False
+        rows = result.get("rows", [])
+        return bool(rows and int(rows[0].get("cnt", 0)) > 0)
+
+    # -- internal helpers ----------------------------------------------------
+
+    def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]:
+        report_text = self._format_report(raw_data)
         try:
             llm_response = self.llm_client.generate(
                 prompt=report_text,
@@ -196,13 +372,26 @@ def analyse(self) -> dict[str, Any]:
             )
         except (ConnectionError, RuntimeError) as exc:
             llm_response = f"LLM analysis failed: {exc}"
-
         return {
             "raw_data": raw_data,
             "report_text": report_text,
             "analysis": llm_response,
         }
 
+    def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]:
+        try:
+            llm_response = self.llm_client.generate(
+                prompt=report_text,
+                system_prompt=ANALYSIS_SYSTEM_PROMPT,
+            )
+        except (ConnectionError, RuntimeError) as exc:
+            llm_response = f"LLM analysis failed: {exc}"
+        return {
+            "raw_data": {},
+            "report_text": report_text,
+            "analysis": llm_response,
+        }
+
     # -- Oracle collection ---------------------------------------------------
 
     def _collect_oracle(self) -> dict[str, Any]:
@@ -223,6 +412,51 @@ def _collect_oracle(self) -> dict[str, Any]:
         sections["db_type"] = DB_TYPE_ORACLE
         return sections
 
+    def _collect_oracle_awr(self, begin_snap: int, end_snap: int) -> dict[str, Any]:
+        """Collect AWR historical data between two snap IDs."""
+        sections: dict[str, Any] = {}
+        snap_range = {":begin_snap": str(begin_snap), ":end_snap": str(end_snap)}
+        queries = {
+            "awr_top_sql": _ORA_AWR_TOP_SQL,
+            "awr_wait_events": _ORA_AWR_WAIT_EVENTS,
+            "awr_system_stats": _ORA_AWR_SYS_STATS,
+        }
+        for name, sql in queries.items():
+            bound_sql = sql
+            for placeholder, val in snap_range.items():
+                bound_sql = bound_sql.replace(placeholder, val)
+            result = self.db_client.execute_query(bound_sql)
+            if "error" in result:
+                sections[name] = {"error": result["error"]}
+            else:
+                sections[name] = result.get("rows", [])
+        sections["db_type"] = DB_TYPE_ORACLE
+        sections["snap_range"] = f"{begin_snap} - {end_snap}"
+        return sections
+
+    # -- pgProfile collection ------------------------------------------------
+
+    def _collect_pgprofile(self, begin_sample: int, end_sample: int) -> dict[str, Any]:
+        """Collect pgProfile historical data between two sample IDs."""
+        sections: dict[str, Any] = {}
+        queries = {
+            "pgprofile_top_sql": _PG_PGPROFILE_TOP_SQL.format(
+                begin_sample=begin_sample, end_sample=end_sample
+            ),
+            "pgprofile_wait_events": _PG_PGPROFILE_WAIT_EVENTS.format(
+                begin_sample=begin_sample, end_sample=end_sample
+            ),
+        }
+        for name, sql in queries.items():
+            result = self.db_client.execute_query(sql)
+            if "error" in result:
+                sections[name] = {"error": result["error"]}
+            else:
+                sections[name] = result.get("rows", [])
+        sections["db_type"] = DB_TYPE_POSTGRESQL
+        sections["sample_range"] = f"{begin_sample} - {end_sample}"
+        return sections
+
     # -- PostgreSQL collection -----------------------------------------------
 
     def _collect_postgresql(self) -> dict[str, Any]:
@@ -278,3 +512,116 @@ def _format_row(row: dict[str, Any]) -> str:
             continue
         items.append(f"{k}={v}")
     return ", ".join(items)
+
+
+# ---------------------------------------------------------------------------
+# Report file parsing
+# ---------------------------------------------------------------------------
+def parse_uploaded_report(content: str, file_name: str) -> str:
+    """Parse an uploaded report file and return text suitable for LLM analysis.
+
+    Supported formats:
+    - AWR HTML report (Oracle)
+    - AWR text report (Oracle)
+    - pg_stat_statements CSV export
+    - pgProfile text/HTML report
+    - Plain text report
+    """
+    lower_name = file_name.lower()
+
+    if lower_name.endswith(".csv"):
+        return _parse_csv_report(content, file_name)
+    if lower_name.endswith((".html", ".htm")):
+        return _parse_html_report(content, file_name)
+    return _parse_text_report(content, file_name)
+
+
+def _parse_csv_report(content: str, file_name: str) -> str:
+    """Parse a CSV file (e.g. pg_stat_statements export)."""
+    parts = [f"UPLOADED REPORT: {file_name}\n{'=' * 60}\n"]
+    parts.append("Format: CSV (likely pg_stat_statements or similar export)\n")
+
+    reader = csv.DictReader(io.StringIO(content))
+    rows = list(reader)
+    if not rows:
+        parts.append("(empty CSV)")
+        return "\n".join(parts)
+
+    parts.append(f"Columns: {', '.join(rows[0].keys())}")
+    parts.append(f"Total rows: {len(rows)}\n")
+
+    for i, row in enumerate(rows[:30]):
+        parts.append(f"  [{i + 1}] {_format_row(row)}")
+    if len(rows) > 30:
+        parts.append(f"  ... and {len(rows) - 30} more rows")
+
+    return "\n".join(parts)
+
+
+def _parse_html_report(content: str, file_name: str) -> str:
+    """Parse an HTML report (AWR or pgProfile) by extracting text content."""
+    parts = [f"UPLOADED REPORT: {file_name}\n{'=' * 60}\n"]
+
+    if (
+        "AWR" in content[:2000].upper()
+        or "WORKLOAD REPOSITORY" in content[:2000].upper()
+    ):
+        parts.append("Format: Oracle AWR HTML Report\n")
+    elif (
+        "pgprofile" in content[:2000].lower() or "pg_profile" in content[:2000].lower()
+    ):
+        parts.append("Format: pgProfile HTML Report\n")
+    else:
+        parts.append("Format: HTML Report\n")
+
+    # Strip HTML tags to get text content
+    text = re.sub(
+        r"<style[^>]*>.*?</style>", "", content, flags=re.DOTALL | re.IGNORECASE
+    )
+    text = re.sub(
+        r"<script[^>]*>.*?</script>", "", text, flags=re.DOTALL | re.IGNORECASE
+    )
+    text = re.sub(r"<[^>]+>", " ", text)
+    text = re.sub(r"&nbsp;", " ", text)
+    text = re.sub(r"&lt;", "<", text)
+    text = re.sub(r"&gt;", ">", text)
+    text = re.sub(r"&amp;", "&", text)
+    text = re.sub(r"\s+", " ", text).strip()
+
+    # Truncate to a reasonable size for LLM context
+    max_chars = 15000
+    if len(text) > max_chars:
+        parts.append(text[:max_chars])
+        parts.append(f"\n... (truncated, {len(text)} total characters)")
+    else:
+        parts.append(text)
+
+    return "\n".join(parts)
+
+
+def _parse_text_report(content: str, file_name: str) -> str:
+    """Parse a plain text report (AWR text, pgProfile text, etc.)."""
+    parts = [f"UPLOADED REPORT: {file_name}\n{'=' * 60}\n"]
+
+    if (
+        "AWR" in content[:2000].upper()
+        or "WORKLOAD REPOSITORY" in content[:2000].upper()
+    ):
+        parts.append("Format: Oracle AWR Text Report\n")
+    elif (
+        "pgprofile" in content[:2000].lower() or "pg_profile" in content[:2000].lower()
+    ):
+        parts.append("Format: pgProfile Text Report\n")
+    elif "pg_stat_statements" in content[:2000].lower():
+        parts.append("Format: pg_stat_statements Report\n")
+    else:
+        parts.append("Format: Text Report\n")
+
+    max_chars = 15000
+    if len(content) > max_chars:
+        parts.append(content[:max_chars])
+        parts.append(f"\n... (truncated, {len(content)} total characters)")
+    else:
+        parts.append(content)
+
+    return "\n".join(parts)

From 8bf5c9e461c7e97f7ffece8ff785399a3b1745b1 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 07:31:58 +0000
Subject: [PATCH 08/19] Fix Oracle uppercase column names causing KeyError on
 snap_id

Oracle's oracledb driver returns column names in UPPERCASE by default.
Normalize to lowercase in OracleClient.execute_query() so all downstream
code (AWR snap selector, auto_analyse, etc.) can use lowercase keys
consistently.
---
 tools/pg-assistant/db_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/pg-assistant/db_client.py b/tools/pg-assistant/db_client.py
index a110b7e..385ce9e 100644
--- a/tools/pg-assistant/db_client.py
+++ b/tools/pg-assistant/db_client.py
@@ -282,7 +282,7 @@ def execute_query(self, sql: str) -> dict[str, Any]:
             with self._conn.cursor() as cur:
                 cur.execute(sql)
                 if cur.description:
-                    columns = [desc[0] for desc in cur.description]
+                    columns = [desc[0].lower() for desc in cur.description]
                     raw_rows = cur.fetchall()
                     elapsed = time.monotonic() - start
                     rows = [dict(zip(columns, r)) for r in raw_rows]

From 1f009b2879f45c73ffb01b9e36b1e9f4ef650554 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 07:50:28 +0000
Subject: [PATCH 09/19] Enhance analysis with SQL-ID-specific recommendations
 and rich metadata

- Oracle: collect top CPU SQL (v$sql by cpu_time), full table scans
  (v$sql_plan TABLE ACCESS FULL), existing indexes (all_indexes +
  all_ind_columns with LISTAGG), stale stats (all_tab_statistics),
  and execution plans (v$sql_plan detail for top 5 sql_ids)
- PostgreSQL: collect top CPU queries (pg_stat_statements with
  blk_read_time/temp_blks), seq scan tables (pg_stat_user_tables
  with avg rows per scan), existing indexes (pg_indexes with DDL),
  stale stats/vacuum (dead tuples, last_analyze), lock waits
  (pg_stat_activity)
- Rewrote LLM system prompt to require SQL-ID-specific analysis:
  high-CPU SQL with exact sql_id/queryid, full table scan tables
  with causing sql_id, missing index CREATE statements referencing
  the queryid that benefits, stale stats with ANALYZE/DBMS_STATS
  commands, unused index DROP statements, and numbered action plan
  with exact SQL commands and expected improvement
---
 tools/pg-assistant/auto_analyse.py | 285 +++++++++++++++++++++++++++--
 1 file changed, 273 insertions(+), 12 deletions(-)

diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index 3411e25..c266d60 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -87,6 +87,97 @@
     ) WHERE ROWNUM <= 20
 """
 
+_ORA_FULL_TABLE_SCANS = """
+    SELECT * FROM (
+        SELECT
+            p.sql_id,
+            p.plan_hash_value,
+            p.object_owner,
+            p.object_name AS table_name,
+            p.operation || ' ' || NVL(p.options, '') AS operation,
+            s.executions,
+            ROUND(s.elapsed_time / 1e6, 2) AS elapsed_sec,
+            s.buffer_gets,
+            s.disk_reads,
+            SUBSTR(s.sql_text, 1, 200) AS sql_text
+        FROM v$sql_plan p
+        JOIN v$sql s ON p.sql_id = s.sql_id
+            AND p.child_number = s.child_number
+        WHERE p.operation = 'TABLE ACCESS'
+            AND p.options = 'FULL'
+            AND p.object_owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN')
+        ORDER BY s.elapsed_time DESC
+    ) WHERE ROWNUM <= 20
+"""
+
+_ORA_TOP_CPU_SQL = """
+    SELECT * FROM (
+        SELECT
+            sql_id,
+            plan_hash_value,
+            ROUND(cpu_time / 1e6, 2) AS cpu_sec,
+            ROUND(elapsed_time / 1e6, 2) AS elapsed_sec,
+            executions,
+            buffer_gets,
+            ROUND(buffer_gets / GREATEST(executions, 1)) AS gets_per_exec,
+            SUBSTR(sql_text, 1, 200) AS sql_text
+        FROM v$sql
+        WHERE cpu_time > 0
+        ORDER BY cpu_time DESC
+    ) WHERE ROWNUM <= 15
+"""
+
+_ORA_EXISTING_INDEXES = """
+    SELECT
+        i.table_name,
+        i.index_name,
+        i.index_type,
+        i.uniqueness,
+        i.status,
+        i.num_rows AS index_rows,
+        i.last_analyzed,
+        LISTAGG(c.column_name, ', ') WITHIN GROUP (ORDER BY c.column_position) AS columns
+    FROM all_indexes i
+    JOIN all_ind_columns c ON i.index_name = c.index_name AND i.owner = c.index_owner
+    WHERE i.owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN', 'XDB', 'WMSYS')
+        AND i.table_owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN', 'XDB', 'WMSYS')
+    GROUP BY i.table_name, i.index_name, i.index_type, i.uniqueness,
+             i.status, i.num_rows, i.last_analyzed
+    ORDER BY i.table_name, i.index_name
+"""
+
+_ORA_STALE_STATS = """
+    SELECT
+        table_name,
+        num_rows,
+        TO_CHAR(last_analyzed, 'YYYY-MM-DD HH24:MI') AS last_analyzed,
+        stale_stats,
+        ROUND((SYSDATE - last_analyzed), 1) AS days_since_analyzed
+    FROM all_tab_statistics
+    WHERE owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN', 'XDB', 'WMSYS')
+        AND (stale_stats = 'YES' OR last_analyzed IS NULL
+             OR last_analyzed < SYSDATE - 7)
+    ORDER BY CASE WHEN last_analyzed IS NULL THEN 0
+                  ELSE last_analyzed END
+"""
+
+_ORA_SQL_PLAN_DETAIL = """
+    SELECT
+        sql_id,
+        plan_hash_value,
+        id AS step_id,
+        LPAD(' ', 2 * depth) || operation || ' ' || NVL(options, '') AS operation,
+        object_name,
+        ROUND(cost) AS cost,
+        cardinality AS est_rows,
+        bytes AS est_bytes,
+        access_predicates,
+        filter_predicates
+    FROM v$sql_plan
+    WHERE sql_id = '{sql_id}'
+    ORDER BY child_number, id
+"""
+
 # ---------------------------------------------------------------------------
 # Oracle AWR snapshot queries
 # ---------------------------------------------------------------------------
@@ -277,14 +368,132 @@
     LIMIT 20
 """
 
+_PG_SEQ_SCAN_TABLES = """
+    SELECT
+        schemaname, relname,
+        seq_scan,
+        seq_tup_read,
+        COALESCE(idx_scan, 0) AS idx_scan,
+        n_live_tup,
+        CASE WHEN seq_scan > 0 AND n_live_tup > 0
+            THEN ROUND(seq_tup_read::numeric / GREATEST(seq_scan, 1))
+            ELSE 0
+        END AS avg_rows_per_seq_scan,
+        pg_relation_size(relid) / 1048576 AS table_size_mb
+    FROM pg_stat_user_tables
+    WHERE seq_scan > 0
+        AND n_live_tup > 1000
+    ORDER BY seq_tup_read DESC
+    LIMIT 20
+"""
+
+_PG_EXISTING_INDEXES = """
+    SELECT
+        schemaname, tablename, indexname,
+        indexdef
+    FROM pg_indexes
+    WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
+    ORDER BY tablename, indexname
+"""
+
+_PG_STALE_STATS = """
+    SELECT
+        schemaname, relname,
+        n_live_tup,
+        n_dead_tup,
+        CASE WHEN n_live_tup > 0
+            THEN ROUND(n_dead_tup::numeric / n_live_tup * 100, 2)
+            ELSE 0
+        END AS dead_pct,
+        last_vacuum::text,
+        last_autovacuum::text,
+        last_analyze::text,
+        last_autoanalyze::text
+    FROM pg_stat_user_tables
+    WHERE n_dead_tup > 1000
+        OR last_analyze IS NULL
+        OR last_analyze < now() - interval '7 days'
+    ORDER BY n_dead_tup DESC
+    LIMIT 30
+"""
+
+_PG_TOP_CPU_QUERIES = """
+    SELECT
+        queryid,
+        LEFT(query, 300) AS query_text,
+        calls,
+        ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec,
+        ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec,
+        rows,
+        shared_blks_hit,
+        shared_blks_read,
+        CASE WHEN shared_blks_hit + shared_blks_read > 0
+            THEN ROUND(
+                shared_blks_hit::numeric
+                / (shared_blks_hit + shared_blks_read) * 100, 2
+            )
+            ELSE 100
+        END AS cache_hit_pct,
+        ROUND((blk_read_time / 1000)::numeric, 2) AS blk_read_sec,
+        ROUND((blk_write_time / 1000)::numeric, 2) AS blk_write_sec,
+        temp_blks_read,
+        temp_blks_written
+    FROM pg_stat_statements
+    ORDER BY total_exec_time DESC
+    LIMIT 15
+"""
+
+_PG_LOCK_WAITS = """
+    SELECT
+        pid,
+        usename,
+        LEFT(query, 200) AS query,
+        wait_event_type,
+        wait_event,
+        state,
+        ROUND(EXTRACT(EPOCH FROM (now() - query_start))::numeric, 2) AS running_sec
+    FROM pg_stat_activity
+    WHERE state != 'idle'
+        AND wait_event IS NOT NULL
+    ORDER BY query_start
+    LIMIT 20
+"""
+
 ANALYSIS_SYSTEM_PROMPT = (
-    "You are a senior database performance engineer. "
-    "Analyze the following database performance data and provide:\n"
-    "1. **Executive Summary** (2-3 sentences)\n"
-    "2. **Key Findings** (bullet list of important observations)\n"
-    "3. **Top Issues** (ranked by severity)\n"
-    "4. **Action Plan** (prioritized recommendations with specific SQL or steps)\n\n"
-    "Be concise and actionable. Use markdown formatting."
+    "You are a senior DBA and database performance engineer performing a deep-dive "
+    "analysis. You have been given detailed performance data including SQL IDs/query IDs, "
+    "execution plans, full table scans, existing indexes, and stats freshness.\n\n"
+    "Produce the following sections:\n\n"
+    "## Executive Summary\n"
+    "2-3 sentences summarising the overall database health and biggest concern.\n\n"
+    "## High-CPU / Long-Running SQL\n"
+    "For EACH problematic SQL (reference the sql_id or queryid):\n"
+    "- Quote the sql_id / queryid and a snippet of the SQL text\n"
+    "- Explain WHY it is slow (full table scan, missing index, bad stats, etc.)\n"
+    "- Provide the EXACT fix SQL (CREATE INDEX, ANALYZE, rewrite, etc.)\n\n"
+    "## Full Table Scans\n"
+    "List every table being full-scanned with the sql_id causing it.\n"
+    "- For each, check the existing indexes section — if an index already exists "
+    "that should have been used, suggest gathering fresh stats or checking predicates.\n"
+    "- If no suitable index exists, provide the exact CREATE INDEX statement.\n\n"
+    "## Missing / Recommended Indexes\n"
+    "Based on the query patterns (WHERE, JOIN, ORDER BY columns visible in SQL text), "
+    "suggest specific CREATE INDEX statements. Reference the sql_id/queryid that "
+    "would benefit.\n\n"
+    "## Stale Statistics / Vacuum Issues\n"
+    "List tables with stale or missing stats. Provide exact ANALYZE / DBMS_STATS "
+    "commands. For PostgreSQL, flag tables with high dead-tuple ratios needing VACUUM.\n\n"
+    "## Unused Indexes\n"
+    "List indexes that have never been scanned and recommend dropping them "
+    "(provide DROP INDEX statements).\n\n"
+    "## Action Plan (Priority Order)\n"
+    "Numbered list of actions sorted by impact. Each action must include:\n"
+    "- The specific sql_id / queryid / table affected\n"
+    "- The exact SQL command to execute\n"
+    "- Expected improvement\n\n"
+    "IMPORTANT: Be SPECIFIC — always reference sql_id, queryid, or table name. "
+    "Never give generic advice like 'add indexes where needed'. "
+    "Use markdown formatting with code blocks for SQL."
 )
 
 
@@ -397,7 +606,11 @@ def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]:
     def _collect_oracle(self) -> dict[str, Any]:
         sections: dict[str, Any] = {}
         queries = {
-            "top_sql": _ORA_TOP_SQL,
+            "top_cpu_sql": _ORA_TOP_CPU_SQL,
+            "top_elapsed_sql": _ORA_TOP_SQL,
+            "full_table_scans": _ORA_FULL_TABLE_SCANS,
+            "existing_indexes": _ORA_EXISTING_INDEXES,
+            "stale_statistics": _ORA_STALE_STATS,
             "wait_events": _ORA_WAIT_EVENTS,
             "system_stats": _ORA_SYS_STATS,
             "sga_info": _ORA_SGA,
@@ -409,9 +622,37 @@ def _collect_oracle(self) -> dict[str, Any]:
                 sections[name] = {"error": result["error"]}
             else:
                 sections[name] = result.get("rows", [])
+
+        # Collect execution plans for top 5 SQL IDs
+        top_sql_ids = self._extract_oracle_sql_ids(sections)
+        plans: list[dict[str, Any]] = []
+        for sql_id in top_sql_ids[:5]:
+            plan_sql = _ORA_SQL_PLAN_DETAIL.format(sql_id=sql_id)
+            result = self.db_client.execute_query(plan_sql)
+            if "error" not in result:
+                rows = result.get("rows", [])
+                if rows:
+                    plans.append({"sql_id": sql_id, "steps": rows})
+        if plans:
+            sections["execution_plans"] = plans
+
         sections["db_type"] = DB_TYPE_ORACLE
         return sections
 
+    def _extract_oracle_sql_ids(self, sections: dict[str, Any]) -> list[str]:
+        """Extract unique sql_ids from top SQL sections, ordered by elapsed time."""
+        seen: set[str] = set()
+        ids: list[str] = []
+        for key in ("top_cpu_sql", "top_elapsed_sql", "full_table_scans"):
+            data = sections.get(key, [])
+            if isinstance(data, list):
+                for row in data:
+                    sid = row.get("sql_id", "")
+                    if sid and sid not in seen:
+                        seen.add(sid)
+                        ids.append(sid)
+        return ids
+
     def _collect_oracle_awr(self, begin_snap: int, end_snap: int) -> dict[str, Any]:
         """Collect AWR historical data between two snap IDs."""
         sections: dict[str, Any] = {}
@@ -462,11 +703,16 @@ def _collect_pgprofile(self, begin_sample: int, end_sample: int) -> dict[str, An
     def _collect_postgresql(self) -> dict[str, Any]:
         sections: dict[str, Any] = {}
         queries = {
+            "top_cpu_queries": _PG_TOP_CPU_QUERIES,
             "top_queries": _PG_TOP_QUERIES,
+            "seq_scan_tables": _PG_SEQ_SCAN_TABLES,
+            "existing_indexes": _PG_EXISTING_INDEXES,
+            "stale_stats_vacuum": _PG_STALE_STATS,
             "table_stats": _PG_TABLE_STATS,
             "database_stats": _PG_DB_STATS,
             "bgwriter_stats": _PG_BGWRITER,
             "unused_indexes": _PG_UNUSED_INDEXES,
+            "lock_waits": _PG_LOCK_WAITS,
         }
         for name, sql in queries.items():
             result = self.db_client.execute_query(sql)
@@ -485,19 +731,34 @@ def _format_report(self, data: dict[str, Any]) -> str:
         parts = [f"DATABASE PERFORMANCE REPORT ({db_type.upper()})\n{'=' * 60}\n"]
 
         for section_name, section_data in data.items():
-            if section_name == "db_type":
+            if section_name in ("db_type", "snap_range", "sample_range"):
                 continue
             parts.append(f"\n--- {section_name.upper().replace('_', ' ')} ---")
             if isinstance(section_data, dict) and "error" in section_data:
                 parts.append(f"  ERROR: {section_data['error']}")
+            elif section_name == "execution_plans" and isinstance(section_data, list):
+                for plan in section_data:
+                    parts.append(f"\n  PLAN FOR sql_id={plan.get('sql_id', '?')}:")
+                    for step in plan.get("steps", [])[:20]:
+                        parts.append(f"    {_format_row(step)}")
             elif isinstance(section_data, list):
                 if not section_data:
                     parts.append("  (no data)")
                 else:
-                    for i, row in enumerate(section_data[:15]):
+                    limit = (
+                        25
+                        if section_name
+                        in (
+                            "existing_indexes",
+                            "stale_statistics",
+                            "stale_stats_vacuum",
+                        )
+                        else 15
+                    )
+                    for i, row in enumerate(section_data[:limit]):
                         parts.append(f"  [{i + 1}] {_format_row(row)}")
-                    if len(section_data) > 15:
-                        parts.append(f"  ... and {len(section_data) - 15} more rows")
+                    if len(section_data) > limit:
+                        parts.append(f"  ... and {len(section_data) - limit} more rows")
             else:
                 parts.append(f"  {section_data}")
 

From 1aa59c1ad1faaf07a75912cf8ccb9b889c9c6528 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 08:30:33 +0000
Subject: [PATCH 10/19] Add Session/Lock Monitor and SQL Tuning Advisor tabs

Session/Lock Monitor (session_monitor.py):
- Active sessions view (v$session / pg_stat_activity)
- Blocking lock tree with recursive hierarchy (CONNECT BY for Oracle,
  recursive CTE for PostgreSQL)
- Lock details (v$lock / pg_locks with object names)
- Long-running queries (>5s threshold)
- Wait event chains
- Kill/cancel session UI (ALTER SYSTEM KILL SESSION for Oracle,
  pg_cancel_backend/pg_terminate_backend for PostgreSQL)

SQL Tuning Advisor (sql_tuning_advisor.py):
- Paste any SQL, runs EXPLAIN PLAN (Oracle) or EXPLAIN (PostgreSQL)
- Extracts tables from plan, collects per-table metadata:
  column stats, existing indexes, table stats, clustering factor
- PostgreSQL: optional EXPLAIN ANALYZE with actual execution stats
- LLM prompt requires step-by-step plan analysis, root cause,
  specific CREATE INDEX statements, SQL rewrite suggestions,
  stats maintenance commands, and numbered action plan

Updated app.py with two new tabs in the UI.
---
 tools/pg-assistant/app.py                | 211 ++++++++++-
 tools/pg-assistant/session_monitor.py    | 321 +++++++++++++++++
 tools/pg-assistant/sql_tuning_advisor.py | 440 +++++++++++++++++++++++
 3 files changed, 970 insertions(+), 2 deletions(-)
 create mode 100644 tools/pg-assistant/session_monitor.py
 create mode 100644 tools/pg-assistant/sql_tuning_advisor.py

diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index 7073f9d..93429e8 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -20,7 +20,9 @@
 )
 from llm_client import LLMClient
 from profile_manager import ProfileManager
+from session_monitor import SessionMonitor
 from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError
+from sql_tuning_advisor import SQLTuningAdvisor
 
 # ---------------------------------------------------------------------------
 # Page config
@@ -261,8 +263,24 @@ def _connected_db_type() -> str:
 # ---------------------------------------------------------------------------
 # Tabs
 # ---------------------------------------------------------------------------
-tab_query, tab_schema, tab_monitor, tab_analyse, tab_history = st.tabs(
-    ["💬 Query", "📋 Schema", "📡 Auto Monitor", "📊 Auto Analyse", "📜 History"]
+(
+    tab_query,
+    tab_schema,
+    tab_monitor,
+    tab_analyse,
+    tab_sessions,
+    tab_tuning,
+    tab_history,
+) = st.tabs(
+    [
+        "💬 Query",
+        "📋 Schema",
+        "📡 Auto Monitor",
+        "📊 Auto Analyse",
+        "🔒 Sessions & Locks",
+        "🔧 SQL Tuning Advisor",
+        "📜 History",
+    ]
 )
 
 # ---- Query tab ------------------------------------------------------------
@@ -843,6 +861,195 @@ def _connected_db_type() -> str:
                 with st.expander("📄 Parsed Report Text"):
                     st.text(last["report_text"][:5000])
 
+# ---- Sessions & Locks tab -------------------------------------------------
+with tab_sessions:
+    st.subheader("🔒 Session & Lock Monitor")
+
+    if not (st.session_state.db_client and st.session_state.db_client.is_connected):
+        st.warning("Connect to a database first.")
+    else:
+        db_client = st.session_state.db_client
+        monitor = SessionMonitor(db_client)
+        is_oracle = db_client.db_type == DB_TYPE_ORACLE
+
+        sess_view = st.radio(
+            "View",
+            [
+                "Active Sessions",
+                "Blocking Lock Tree",
+                "Lock Details",
+                "Long-Running Queries",
+                "Wait Events",
+            ],
+            horizontal=True,
+            key="sess_view",
+        )
+
+        if st.button("🔄 Refresh", key="sess_refresh"):
+            st.session_state["_sess_data"] = None
+
+        # Fetch data based on selected view
+        with st.spinner("Querying sessions..."):
+            if sess_view == "Active Sessions":
+                result = monitor.get_active_sessions()
+            elif sess_view == "Blocking Lock Tree":
+                result = monitor.get_blocking_tree()
+            elif sess_view == "Lock Details":
+                result = monitor.get_lock_details()
+            elif sess_view == "Long-Running Queries":
+                result = monitor.get_long_running()
+            else:
+                result = monitor.get_wait_events()
+
+        if "error" in result:
+            st.error(result["error"])
+        else:
+            rows = result.get("rows", [])
+            if rows:
+                st.caption(f"{len(rows)} row(s)")
+                st.dataframe(
+                    pd.DataFrame(rows),
+                    use_container_width=True,
+                    hide_index=True,
+                )
+
+                # Kill session UI
+                st.divider()
+                st.subheader("Kill / Cancel Session")
+                st.warning(
+                    "Use with caution. This will terminate the selected session."
+                )
+                kcol1, kcol2, kcol3 = st.columns([2, 2, 2])
+
+                if is_oracle:
+                    with kcol1:
+                        kill_sid = st.number_input(
+                            "SID", min_value=1, step=1, key="kill_sid"
+                        )
+                    with kcol2:
+                        kill_serial = st.number_input(
+                            "Serial#", min_value=1, step=1, key="kill_serial"
+                        )
+                    with kcol3:
+                        if st.button(
+                            "⚠️ Kill Session (Oracle)",
+                            type="primary",
+                            key="kill_ora",
+                        ):
+                            kill_result = monitor.kill_session(kill_sid, kill_serial)
+                            if kill_result.get("success"):
+                                st.success(f"Session {kill_sid},{kill_serial} killed.")
+                            else:
+                                st.error(kill_result.get("error", "Kill failed"))
+                else:
+                    with kcol1:
+                        kill_pid = st.number_input(
+                            "PID", min_value=1, step=1, key="kill_pid"
+                        )
+                    with kcol2:
+                        kill_force = st.checkbox(
+                            "Force terminate (pg_terminate_backend)",
+                            key="kill_force",
+                        )
+                    with kcol3:
+                        label = "⚠️ Terminate Backend" if kill_force else "Cancel Query"
+                        if st.button(label, type="primary", key="kill_pg"):
+                            kill_result = monitor.kill_session(
+                                kill_pid, force=kill_force
+                            )
+                            if "error" in kill_result:
+                                st.error(kill_result["error"])
+                            else:
+                                st.success(
+                                    f"PID {kill_pid} "
+                                    f"{'terminated' if kill_force else 'cancel sent'}."
+                                )
+            else:
+                st.info("No sessions/locks found for this view.")
+
+# ---- SQL Tuning Advisor tab -----------------------------------------------
+with tab_tuning:
+    st.subheader("🔧 SQL Tuning Advisor")
+    st.markdown(
+        "Paste a SQL statement to get its **execution plan**, table metadata, "
+        "and **LLM-powered tuning recommendations** (index suggestions, "
+        "SQL rewrites, stats maintenance)."
+    )
+
+    if not (st.session_state.db_client and st.session_state.db_client.is_connected):
+        st.warning("Connect to a database first.")
+    elif not st.session_state.llm_client:
+        st.warning("Configure Ollama settings and connect first.")
+    else:
+        db_client = st.session_state.db_client
+        llm_client = st.session_state.llm_client
+        is_oracle = db_client.db_type == DB_TYPE_ORACLE
+
+        tune_sql = st.text_area(
+            "SQL to tune",
+            height=200,
+            placeholder=(
+                "SELECT o.order_id, c.customer_name, p.product_name\n"
+                "FROM orders o\n"
+                "JOIN customers c ON o.customer_id = c.id\n"
+                "JOIN products p ON o.product_id = p.id\n"
+                "WHERE o.order_date > '2024-01-01'\n"
+                "ORDER BY o.order_date DESC"
+            ),
+            key="tune_sql_input",
+        )
+
+        tcol1, tcol2 = st.columns(2)
+        with tcol1:
+            if not is_oracle:
+                run_analyze = st.checkbox(
+                    "Use EXPLAIN ANALYZE (executes the query — use with caution)",
+                    key="tune_analyze",
+                )
+            else:
+                run_analyze = False
+
+        with tcol2:
+            tune_btn = st.button(
+                "🔧 Analyse & Tune",
+                use_container_width=True,
+                type="primary",
+                key="tune_btn",
+            )
+
+        if tune_btn and tune_sql.strip():
+            advisor = SQLTuningAdvisor(db_client=db_client, llm_client=llm_client)
+            with st.spinner(
+                "Running EXPLAIN, collecting metadata, analysing with LLM..."
+            ):
+                result = advisor.analyse_sql(tune_sql.strip(), run_analyze=run_analyze)
+
+            if result.get("error"):
+                st.error(result["error"])
+            else:
+                # Show execution plan
+                plan_text = result.get("plan_text", "")
+                if plan_text:
+                    st.subheader("Execution Plan")
+                    st.code(plan_text, language="text")
+
+                # Show LLM analysis
+                analysis = result.get("analysis", "")
+                if analysis:
+                    st.divider()
+                    st.subheader("AI Tuning Recommendations")
+                    st.markdown(analysis)
+
+                # Show raw metadata in expander
+                metadata = result.get("metadata", {})
+                table_meta = metadata.get("table_metadata", "")
+                if table_meta:
+                    with st.expander("📋 Table Metadata (columns, indexes, stats)"):
+                        st.text(table_meta[:8000])
+
+        elif tune_btn:
+            st.warning("Please enter a SQL statement to tune.")
+
 # ---- History tab ----------------------------------------------------------
 with tab_history:
     st.subheader("Query History")
diff --git a/tools/pg-assistant/session_monitor.py b/tools/pg-assistant/session_monitor.py
new file mode 100644
index 0000000..f025b63
--- /dev/null
+++ b/tools/pg-assistant/session_monitor.py
@@ -0,0 +1,321 @@
+"""Session and lock monitoring for Oracle and PostgreSQL.
+
+Provides live views of:
+- Active sessions and their current SQL
+- Blocking lock trees (who is blocking whom)
+- Wait chains
+- Long-running queries
+"""
+
+import logging
+from typing import Any
+
+from db_client import BaseDBClient, DB_TYPE_ORACLE
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Oracle session / lock queries
+# ---------------------------------------------------------------------------
+_ORA_ACTIVE_SESSIONS = """
+    SELECT
+        s.sid,
+        s.serial#  AS serial_num,
+        s.username,
+        s.status,
+        s.osuser,
+        s.machine,
+        s.program,
+        s.wait_class,
+        s.event,
+        s.seconds_in_wait,
+        s.sql_id,
+        SUBSTR(q.sql_text, 1, 200) AS sql_text,
+        s.blocking_session,
+        s.blocking_session_status
+    FROM v$session s
+    LEFT JOIN v$sql q ON s.sql_id = q.sql_id AND s.sql_child_number = q.child_number
+    WHERE s.type = 'USER'
+        AND s.status = 'ACTIVE'
+    ORDER BY s.seconds_in_wait DESC
+"""
+
+_ORA_BLOCKING_TREE = """
+    SELECT
+        LPAD(' ', 2 * (LEVEL - 1)) || s.sid || ',' || s.serial# AS session_id,
+        s.username,
+        s.status,
+        s.sql_id,
+        SUBSTR(q.sql_text, 1, 200) AS sql_text,
+        s.event,
+        s.seconds_in_wait,
+        s.blocking_session,
+        l.type AS lock_type,
+        DECODE(l.lmode,
+            0, 'None', 1, 'Null', 2, 'Row-S', 3, 'Row-X',
+            4, 'Share', 5, 'S/Row-X', 6, 'Exclusive', l.lmode) AS lock_mode,
+        DECODE(l.request,
+            0, 'None', 1, 'Null', 2, 'Row-S', 3, 'Row-X',
+            4, 'Share', 5, 'S/Row-X', 6, 'Exclusive', l.request) AS lock_request
+    FROM v$session s
+    LEFT JOIN v$sql q ON s.sql_id = q.sql_id AND s.sql_child_number = q.child_number
+    LEFT JOIN v$lock l ON s.sid = l.sid AND l.block > 0
+    START WITH s.blocking_session IS NOT NULL
+        AND NOT EXISTS (
+            SELECT 1 FROM v$session s2
+            WHERE s2.sid = s.blocking_session
+                AND s2.blocking_session IS NOT NULL
+        )
+    CONNECT BY PRIOR s.sid = s.blocking_session
+    ORDER SIBLINGS BY s.seconds_in_wait DESC
+"""
+
+_ORA_LOCK_DETAILS = """
+    SELECT
+        l.sid,
+        s.serial# AS serial_num,
+        s.username,
+        l.type AS lock_type,
+        DECODE(l.lmode,
+            0, 'None', 1, 'Null', 2, 'Row-S', 3, 'Row-X',
+            4, 'Share', 5, 'S/Row-X', 6, 'Exclusive', l.lmode) AS lock_mode,
+        DECODE(l.request,
+            0, 'None', 1, 'Null', 2, 'Row-S', 3, 'Row-X',
+            4, 'Share', 5, 'S/Row-X', 6, 'Exclusive', l.request) AS lock_request,
+        l.block,
+        o.object_name,
+        o.object_type,
+        s.sql_id,
+        SUBSTR(q.sql_text, 1, 200) AS sql_text
+    FROM v$lock l
+    JOIN v$session s ON l.sid = s.sid
+    LEFT JOIN dba_objects o ON l.id1 = o.object_id
+    LEFT JOIN v$sql q ON s.sql_id = q.sql_id AND s.sql_child_number = q.child_number
+    WHERE l.type NOT IN ('AE', 'PS')
+        AND (l.block > 0 OR l.request > 0)
+    ORDER BY l.block DESC, l.request DESC
+"""
+
+_ORA_LONG_RUNNING = """
+    SELECT * FROM (
+        SELECT
+            s.sid,
+            s.serial# AS serial_num,
+            s.username,
+            s.sql_id,
+            SUBSTR(q.sql_text, 1, 200) AS sql_text,
+            ROUND(s.last_call_et) AS running_sec,
+            s.event,
+            s.wait_class,
+            s.program,
+            s.machine
+        FROM v$session s
+        LEFT JOIN v$sql q ON s.sql_id = q.sql_id
+            AND s.sql_child_number = q.child_number
+        WHERE s.type = 'USER'
+            AND s.status = 'ACTIVE'
+            AND s.last_call_et > 5
+        ORDER BY s.last_call_et DESC
+    ) WHERE ROWNUM <= 30
+"""
+
+_ORA_WAIT_CHAINS = """
+    SELECT
+        s.sid,
+        s.serial# AS serial_num,
+        s.username,
+        s.event,
+        s.wait_class,
+        s.seconds_in_wait,
+        s.blocking_session,
+        s.sql_id
+    FROM v$session s
+    WHERE s.type = 'USER'
+        AND s.wait_class != 'Idle'
+        AND s.seconds_in_wait > 1
+    ORDER BY s.seconds_in_wait DESC
+"""
+
+# ---------------------------------------------------------------------------
+# PostgreSQL session / lock queries
+# ---------------------------------------------------------------------------
+_PG_ACTIVE_SESSIONS = """
+    SELECT
+        pid,
+        usename,
+        datname,
+        client_addr::text,
+        application_name,
+        state,
+        wait_event_type,
+        wait_event,
+        LEFT(query, 300) AS query,
+        ROUND(EXTRACT(EPOCH FROM (now() - query_start))::numeric, 1) AS running_sec,
+        ROUND(EXTRACT(EPOCH FROM (now() - backend_start))::numeric, 0) AS session_age_sec
+    FROM pg_stat_activity
+    WHERE pid != pg_backend_pid()
+        AND state != 'idle'
+    ORDER BY query_start
+"""
+
+_PG_BLOCKING_TREE = """
+    WITH RECURSIVE lock_tree AS (
+        SELECT
+            blocked.pid AS blocked_pid,
+            blocked.usename AS blocked_user,
+            LEFT(blocked.query, 200) AS blocked_query,
+            blocked.wait_event_type,
+            blocked.wait_event,
+            blocking.pid AS blocking_pid,
+            blocking.usename AS blocking_user,
+            LEFT(blocking.query, 200) AS blocking_query,
+            1 AS depth
+        FROM pg_stat_activity blocked
+        JOIN pg_locks bl ON bl.pid = blocked.pid
+        JOIN pg_locks kl ON kl.locktype = bl.locktype
+            AND kl.database IS NOT DISTINCT FROM bl.database
+            AND kl.relation IS NOT DISTINCT FROM bl.relation
+            AND kl.page IS NOT DISTINCT FROM bl.page
+            AND kl.tuple IS NOT DISTINCT FROM bl.tuple
+            AND kl.virtualxid IS NOT DISTINCT FROM bl.virtualxid
+            AND kl.transactionid IS NOT DISTINCT FROM bl.transactionid
+            AND kl.classid IS NOT DISTINCT FROM bl.classid
+            AND kl.objid IS NOT DISTINCT FROM bl.objid
+            AND kl.objsubid IS NOT DISTINCT FROM bl.objsubid
+            AND kl.pid != bl.pid
+        JOIN pg_stat_activity blocking ON kl.pid = blocking.pid
+        WHERE NOT bl.granted AND kl.granted
+    )
+    SELECT DISTINCT
+        blocked_pid,
+        blocked_user,
+        blocked_query,
+        wait_event_type,
+        wait_event,
+        blocking_pid,
+        blocking_user,
+        blocking_query,
+        depth
+    FROM lock_tree
+    ORDER BY blocking_pid, depth
+"""
+
+_PG_LOCK_DETAILS = """
+    SELECT
+        l.pid,
+        a.usename,
+        l.locktype,
+        l.mode,
+        l.granted,
+        l.relation::regclass::text AS locked_relation,
+        LEFT(a.query, 200) AS query,
+        a.state,
+        ROUND(EXTRACT(EPOCH FROM (now() - a.query_start))::numeric, 1) AS query_sec
+    FROM pg_locks l
+    JOIN pg_stat_activity a ON l.pid = a.pid
+    WHERE l.pid != pg_backend_pid()
+        AND l.relation IS NOT NULL
+    ORDER BY l.granted, a.query_start
+"""
+
+_PG_LONG_RUNNING = """
+    SELECT
+        pid,
+        usename,
+        datname,
+        LEFT(query, 300) AS query,
+        state,
+        wait_event_type,
+        wait_event,
+        ROUND(EXTRACT(EPOCH FROM (now() - query_start))::numeric, 1) AS running_sec,
+        application_name,
+        client_addr::text
+    FROM pg_stat_activity
+    WHERE pid != pg_backend_pid()
+        AND state = 'active'
+        AND query_start < now() - interval '5 seconds'
+    ORDER BY query_start
+    LIMIT 30
+"""
+
+_PG_WAIT_EVENTS = """
+    SELECT
+        pid,
+        usename,
+        wait_event_type,
+        wait_event,
+        state,
+        LEFT(query, 200) AS query,
+        ROUND(EXTRACT(EPOCH FROM (now() - query_start))::numeric, 1) AS running_sec
+    FROM pg_stat_activity
+    WHERE pid != pg_backend_pid()
+        AND wait_event IS NOT NULL
+        AND state != 'idle'
+    ORDER BY query_start
+"""
+
+
+# ---------------------------------------------------------------------------
+# Kill session queries
+# ---------------------------------------------------------------------------
+_ORA_KILL_SESSION = "ALTER SYSTEM KILL SESSION '{sid},{serial_num}' IMMEDIATE"
+
+_PG_CANCEL_QUERY = "SELECT pg_cancel_backend({pid})"
+_PG_TERMINATE_BACKEND = "SELECT pg_terminate_backend({pid})"
+
+
+# ---------------------------------------------------------------------------
+# SessionMonitor class
+# ---------------------------------------------------------------------------
+class SessionMonitor:
+    """Collects session and lock information from Oracle or PostgreSQL."""
+
+    def __init__(self, db_client: BaseDBClient) -> None:
+        self.db_client = db_client
+
+    def get_active_sessions(self) -> dict[str, Any]:
+        """Return active (non-idle) sessions."""
+        if self.db_client.db_type == DB_TYPE_ORACLE:
+            return self.db_client.execute_query(_ORA_ACTIVE_SESSIONS)
+        return self.db_client.execute_query(_PG_ACTIVE_SESSIONS)
+
+    def get_blocking_tree(self) -> dict[str, Any]:
+        """Return blocking lock tree (who blocks whom)."""
+        if self.db_client.db_type == DB_TYPE_ORACLE:
+            return self.db_client.execute_query(_ORA_BLOCKING_TREE)
+        return self.db_client.execute_query(_PG_BLOCKING_TREE)
+
+    def get_lock_details(self) -> dict[str, Any]:
+        """Return detailed lock information."""
+        if self.db_client.db_type == DB_TYPE_ORACLE:
+            return self.db_client.execute_query(_ORA_LOCK_DETAILS)
+        return self.db_client.execute_query(_PG_LOCK_DETAILS)
+
+    def get_long_running(self) -> dict[str, Any]:
+        """Return long-running queries (>5 seconds)."""
+        if self.db_client.db_type == DB_TYPE_ORACLE:
+            return self.db_client.execute_query(_ORA_LONG_RUNNING)
+        return self.db_client.execute_query(_PG_LONG_RUNNING)
+
+    def get_wait_events(self) -> dict[str, Any]:
+        """Return sessions currently waiting."""
+        if self.db_client.db_type == DB_TYPE_ORACLE:
+            return self.db_client.execute_query(_ORA_WAIT_CHAINS)
+        return self.db_client.execute_query(_PG_WAIT_EVENTS)
+
+    def kill_session(
+        self, pid_or_sid: int, serial_num: int = 0, force: bool = False
+    ) -> dict[str, Any]:
+        """Kill/cancel a session.
+
+        Oracle: ALTER SYSTEM KILL SESSION 'sid,serial#' IMMEDIATE
+        PostgreSQL: pg_cancel_backend (soft) or pg_terminate_backend (force)
+        """
+        if self.db_client.db_type == DB_TYPE_ORACLE:
+            sql = _ORA_KILL_SESSION.format(sid=pid_or_sid, serial_num=serial_num)
+            return self.db_client.execute_statement(sql)
+        if force:
+            sql = _PG_TERMINATE_BACKEND.format(pid=pid_or_sid)
+        else:
+            sql = _PG_CANCEL_QUERY.format(pid=pid_or_sid)
+        return self.db_client.execute_query(sql)
diff --git a/tools/pg-assistant/sql_tuning_advisor.py b/tools/pg-assistant/sql_tuning_advisor.py
new file mode 100644
index 0000000..300ebfb
--- /dev/null
+++ b/tools/pg-assistant/sql_tuning_advisor.py
@@ -0,0 +1,440 @@
+"""SQL Tuning Advisor for Oracle and PostgreSQL.
+
+Accepts a SQL statement, runs EXPLAIN PLAN, collects relevant metadata
+(table DDL, existing indexes, stats), and uses the LLM to generate
+specific tuning recommendations.
+"""
+
+import logging
+from typing import Any
+
+from db_client import BaseDBClient, DB_TYPE_ORACLE
+from llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Oracle EXPLAIN helpers
+# ---------------------------------------------------------------------------
+_ORA_EXPLAIN_PLAN = "EXPLAIN PLAN FOR {sql}"
+
+_ORA_DISPLAY_PLAN = """
+    SELECT plan_table_output
+    FROM TABLE(DBMS_XPLAN.DISPLAY('PLAN_TABLE', NULL, 'ALL'))
+"""
+
+_ORA_TABLE_DDL = """
+    SELECT
+        column_name,
+        data_type,
+        data_length,
+        data_precision,
+        nullable,
+        num_distinct,
+        num_nulls,
+        density,
+        histogram
+    FROM all_tab_col_statistics
+    WHERE owner = '{owner}'
+        AND table_name = '{table_name}'
+    ORDER BY column_id
+"""
+
+_ORA_TABLE_INDEXES = """
+    SELECT
+        i.index_name,
+        i.index_type,
+        i.uniqueness,
+        i.status,
+        i.num_rows,
+        i.distinct_keys,
+        i.clustering_factor,
+        TO_CHAR(i.last_analyzed, 'YYYY-MM-DD HH24:MI') AS last_analyzed,
+        LISTAGG(c.column_name, ', ')
+            WITHIN GROUP (ORDER BY c.column_position) AS columns
+    FROM all_indexes i
+    JOIN all_ind_columns c
+        ON i.index_name = c.index_name AND i.owner = c.index_owner
+    WHERE i.table_owner = '{owner}'
+        AND i.table_name = '{table_name}'
+    GROUP BY i.index_name, i.index_type, i.uniqueness, i.status,
+             i.num_rows, i.distinct_keys, i.clustering_factor, i.last_analyzed
+    ORDER BY i.index_name
+"""
+
+_ORA_TABLE_STATS = """
+    SELECT
+        table_name,
+        num_rows,
+        blocks,
+        avg_row_len,
+        TO_CHAR(last_analyzed, 'YYYY-MM-DD HH24:MI') AS last_analyzed,
+        stale_stats,
+        sample_size
+    FROM all_tab_statistics
+    WHERE owner = '{owner}'
+        AND table_name = '{table_name}'
+"""
+
+_ORA_EXTRACT_TABLES = """
+    SELECT DISTINCT
+        p.object_owner AS owner,
+        p.object_name AS table_name
+    FROM plan_table p
+    WHERE p.object_type = 'TABLE'
+        AND p.object_owner IS NOT NULL
+"""
+
+# ---------------------------------------------------------------------------
+# PostgreSQL EXPLAIN helpers
+# ---------------------------------------------------------------------------
+_PG_EXPLAIN = "EXPLAIN (ANALYZE false, COSTS true, FORMAT TEXT) {sql}"
+_PG_EXPLAIN_ANALYZE = (
+    "EXPLAIN (ANALYZE true, COSTS true, BUFFERS true, FORMAT TEXT) {sql}"
+)
+
+_PG_TABLE_COLUMNS = """
+    SELECT
+        column_name,
+        data_type,
+        is_nullable,
+        column_default,
+        character_maximum_length
+    FROM information_schema.columns
+    WHERE table_schema = '{schema}'
+        AND table_name = '{table_name}'
+    ORDER BY ordinal_position
+"""
+
+_PG_TABLE_INDEXES = """
+    SELECT
+        indexname,
+        indexdef
+    FROM pg_indexes
+    WHERE schemaname = '{schema}'
+        AND tablename = '{table_name}'
+    ORDER BY indexname
+"""
+
+_PG_TABLE_STATS = """
+    SELECT
+        relname,
+        n_live_tup,
+        n_dead_tup,
+        seq_scan,
+        seq_tup_read,
+        idx_scan,
+        idx_tup_fetch,
+        last_vacuum::text,
+        last_autovacuum::text,
+        last_analyze::text,
+        last_autoanalyze::text
+    FROM pg_stat_user_tables
+    WHERE schemaname = '{schema}'
+        AND relname = '{table_name}'
+"""
+
+_PG_COLUMN_STATS = """
+    SELECT
+        attname AS column_name,
+        n_distinct,
+        null_frac,
+        avg_width,
+        correlation
+    FROM pg_stats
+    WHERE schemaname = '{schema}'
+        AND tablename = '{table_name}'
+    ORDER BY attname
+"""
+
+# ---------------------------------------------------------------------------
+# LLM prompt
+# ---------------------------------------------------------------------------
+TUNING_SYSTEM_PROMPT = (
+    "You are a senior DBA and SQL tuning expert. You have been given a SQL "
+    "statement, its execution plan, table structure, existing indexes, and "
+    "column/table statistics.\n\n"
+    "Produce the following sections:\n\n"
+    "## Execution Plan Analysis\n"
+    "Walk through the plan step by step. Identify:\n"
+    "- Full table scans (and whether they are justified)\n"
+    "- Nested loop joins vs hash joins (and whether the choice is optimal)\n"
+    "- Sort operations that could be avoided\n"
+    "- High-cost steps\n"
+    "- Estimated vs actual row discrepancies (if ANALYZE data available)\n\n"
+    "## Root Cause\n"
+    "Explain WHY the query may be slow. Reference specific plan steps, "
+    "missing indexes, stale statistics, or suboptimal SQL patterns.\n\n"
+    "## Recommended Indexes\n"
+    "For each suggested index:\n"
+    "- Provide the exact `CREATE INDEX` statement\n"
+    "- Explain which plan step it improves\n"
+    "- Note if a composite index is better than multiple single-column indexes\n\n"
+    "## SQL Rewrite Suggestions\n"
+    "If the SQL can be rewritten for better performance:\n"
+    "- Show the rewritten SQL in a code block\n"
+    "- Explain what changed and why it is faster\n"
+    "- Consider: subquery elimination, EXISTS vs IN, join reordering, "
+    "predicate pushdown, avoiding SELECT *\n\n"
+    "## Statistics & Maintenance\n"
+    "If statistics are stale or missing, provide exact commands:\n"
+    "- Oracle: `EXEC DBMS_STATS.GATHER_TABLE_STATS(...)` with proper params\n"
+    "- PostgreSQL: `ANALYZE table_name;` or `VACUUM ANALYZE table_name;`\n\n"
+    "## Summary Action Plan\n"
+    "Numbered list of actions in priority order. Each with:\n"
+    "- The exact SQL command to run\n"
+    "- Expected improvement\n\n"
+    "IMPORTANT: Be SPECIFIC. Reference table names, column names, and index "
+    "names. Provide copy-paste-ready SQL. Use markdown with code blocks."
+)
+
+
+# ---------------------------------------------------------------------------
+# SQLTuningAdvisor class
+# ---------------------------------------------------------------------------
+class SQLTuningAdvisor:
+    """Analyses a SQL statement and provides tuning recommendations."""
+
+    def __init__(
+        self,
+        db_client: BaseDBClient,
+        llm_client: LLMClient,
+    ) -> None:
+        self.db_client = db_client
+        self.llm_client = llm_client
+
+    def analyse_sql(self, sql: str, run_analyze: bool = False) -> dict[str, Any]:
+        """Run EXPLAIN on the SQL, collect metadata, and get LLM recommendations.
+
+        Args:
+            sql: The SQL statement to analyse.
+            run_analyze: If True, use EXPLAIN ANALYZE (PostgreSQL) which
+                actually executes the query. Use with caution on write queries.
+        """
+        if self.db_client.db_type == DB_TYPE_ORACLE:
+            return self._analyse_oracle(sql)
+        return self._analyse_postgresql(sql, run_analyze)
+
+    # -- Oracle ---------------------------------------------------------------
+
+    def _analyse_oracle(self, sql: str) -> dict[str, Any]:
+        sections: dict[str, str] = {}
+
+        # 1. Run EXPLAIN PLAN
+        explain_result = self.db_client.execute_statement(
+            _ORA_EXPLAIN_PLAN.format(sql=sql)
+        )
+        if not explain_result.get("success"):
+            return {
+                "error": f"EXPLAIN PLAN failed: {explain_result.get('error', '')}",
+                "plan_text": "",
+                "metadata": {},
+                "analysis": "",
+            }
+
+        # 2. Get the plan output
+        plan_result = self.db_client.execute_query(_ORA_DISPLAY_PLAN)
+        plan_lines = []
+        if "error" not in plan_result:
+            for row in plan_result.get("rows", []):
+                line = row.get("plan_table_output", "")
+                plan_lines.append(line)
+        plan_text = "\n".join(plan_lines)
+        sections["execution_plan"] = plan_text
+
+        # 3. Extract tables from plan and collect metadata
+        tables_result = self.db_client.execute_query(_ORA_EXTRACT_TABLES)
+        tables = []
+        if "error" not in tables_result:
+            tables = tables_result.get("rows", [])
+
+        metadata_parts: list[str] = []
+        for tbl in tables[:10]:
+            owner = tbl.get("owner", "")
+            table_name = tbl.get("table_name", "")
+            if not owner or not table_name:
+                continue
+
+            # Table columns + stats
+            col_result = self.db_client.execute_query(
+                _ORA_TABLE_DDL.format(owner=owner, table_name=table_name)
+            )
+            if "error" not in col_result and col_result.get("rows"):
+                metadata_parts.append(f"\nTABLE: {owner}.{table_name} COLUMNS:")
+                for r in col_result["rows"]:
+                    metadata_parts.append(f"  {_fmt_row(r)}")
+
+            # Indexes
+            idx_result = self.db_client.execute_query(
+                _ORA_TABLE_INDEXES.format(owner=owner, table_name=table_name)
+            )
+            if "error" not in idx_result and idx_result.get("rows"):
+                metadata_parts.append(f"\nINDEXES ON {owner}.{table_name}:")
+                for r in idx_result["rows"]:
+                    metadata_parts.append(f"  {_fmt_row(r)}")
+
+            # Table stats
+            stat_result = self.db_client.execute_query(
+                _ORA_TABLE_STATS.format(owner=owner, table_name=table_name)
+            )
+            if "error" not in stat_result and stat_result.get("rows"):
+                metadata_parts.append(f"\nSTATISTICS FOR {owner}.{table_name}:")
+                for r in stat_result["rows"]:
+                    metadata_parts.append(f"  {_fmt_row(r)}")
+
+        sections["table_metadata"] = "\n".join(metadata_parts)
+
+        # 4. Build prompt and get LLM analysis
+        prompt = self._build_prompt(sql, sections)
+        analysis = self._get_llm_analysis(prompt)
+
+        return {
+            "plan_text": plan_text,
+            "metadata": sections,
+            "analysis": analysis,
+        }
+
+    # -- PostgreSQL -----------------------------------------------------------
+
+    def _analyse_postgresql(
+        self, sql: str, run_analyze: bool = False
+    ) -> dict[str, Any]:
+        sections: dict[str, str] = {}
+
+        # 1. Run EXPLAIN
+        if run_analyze:
+            explain_sql = _PG_EXPLAIN_ANALYZE.format(sql=sql)
+        else:
+            explain_sql = _PG_EXPLAIN.format(sql=sql)
+
+        plan_result = self.db_client.execute_query(explain_sql)
+        if "error" in plan_result:
+            return {
+                "error": f"EXPLAIN failed: {plan_result['error']}",
+                "plan_text": "",
+                "metadata": {},
+                "analysis": "",
+            }
+
+        plan_lines = []
+        for row in plan_result.get("rows", []):
+            # PostgreSQL EXPLAIN returns a single column
+            line = list(row.values())[0] if row else ""
+            plan_lines.append(str(line))
+        plan_text = "\n".join(plan_lines)
+        sections["execution_plan"] = plan_text
+
+        # 2. Extract table names from the SQL (simple heuristic)
+        tables = self._extract_pg_tables(sql)
+
+        # 3. Collect metadata for each table
+        metadata_parts: list[str] = []
+        for schema, table_name in tables[:10]:
+            # Columns
+            col_result = self.db_client.execute_query(
+                _PG_TABLE_COLUMNS.format(schema=schema, table_name=table_name)
+            )
+            if "error" not in col_result and col_result.get("rows"):
+                metadata_parts.append(f"\nTABLE: {schema}.{table_name} COLUMNS:")
+                for r in col_result["rows"]:
+                    metadata_parts.append(f"  {_fmt_row(r)}")
+
+            # Indexes
+            idx_result = self.db_client.execute_query(
+                _PG_TABLE_INDEXES.format(schema=schema, table_name=table_name)
+            )
+            if "error" not in idx_result and idx_result.get("rows"):
+                metadata_parts.append(f"\nINDEXES ON {schema}.{table_name}:")
+                for r in idx_result["rows"]:
+                    metadata_parts.append(f"  {_fmt_row(r)}")
+
+            # Table stats
+            stat_result = self.db_client.execute_query(
+                _PG_TABLE_STATS.format(schema=schema, table_name=table_name)
+            )
+            if "error" not in stat_result and stat_result.get("rows"):
+                metadata_parts.append(f"\nTABLE STATS FOR {schema}.{table_name}:")
+                for r in stat_result["rows"]:
+                    metadata_parts.append(f"  {_fmt_row(r)}")
+
+            # Column stats
+            cstat_result = self.db_client.execute_query(
+                _PG_COLUMN_STATS.format(schema=schema, table_name=table_name)
+            )
+            if "error" not in cstat_result and cstat_result.get("rows"):
+                metadata_parts.append(f"\nCOLUMN STATS FOR {schema}.{table_name}:")
+                for r in cstat_result["rows"]:
+                    metadata_parts.append(f"  {_fmt_row(r)}")
+
+        sections["table_metadata"] = "\n".join(metadata_parts)
+
+        # 4. Build prompt and get LLM analysis
+        prompt = self._build_prompt(sql, sections)
+        analysis = self._get_llm_analysis(prompt)
+
+        return {
+            "plan_text": plan_text,
+            "metadata": sections,
+            "analysis": analysis,
+        }
+
+    def _extract_pg_tables(self, sql: str) -> list[tuple[str, str]]:
+        """Extract table names from SQL using simple keyword parsing.
+
+        Returns list of (schema, table_name) tuples.
+        """
+        import re
+
+        tables: list[tuple[str, str]] = []
+        seen: set[str] = set()
+
+        # Match FROM/JOIN followed by optional schema.table
+        pattern = r"(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_.]*)"
+        for match in re.finditer(pattern, sql, re.IGNORECASE):
+            full_name = match.group(1).strip().lower()
+            # Skip subquery aliases and keywords
+            if full_name in ("select", "where", "lateral", "unnest"):
+                continue
+            if full_name in seen:
+                continue
+            seen.add(full_name)
+
+            if "." in full_name:
+                schema, table = full_name.rsplit(".", 1)
+            else:
+                schema, table = "public", full_name
+            tables.append((schema, table))
+
+        return tables
+
+    # -- Shared helpers -------------------------------------------------------
+
+    def _build_prompt(self, sql: str, sections: dict[str, str]) -> str:
+        parts = [
+            f"SQL STATEMENT TO TUNE:\n```sql\n{sql}\n```\n",
+            f"\nEXECUTION PLAN:\n```\n{sections.get('execution_plan', '(not available)')}\n```\n",
+        ]
+        meta = sections.get("table_metadata", "")
+        if meta:
+            parts.append(f"\nTABLE METADATA (columns, indexes, statistics):\n{meta}\n")
+
+        return "\n".join(parts)
+
+    def _get_llm_analysis(self, prompt: str) -> str:
+        try:
+            return self.llm_client.generate(
+                prompt=prompt,
+                system_prompt=TUNING_SYSTEM_PROMPT,
+            )
+        except (ConnectionError, RuntimeError) as exc:
+            return f"LLM analysis failed: {exc}"
+
+
+def _fmt_row(row: dict[str, Any]) -> str:
+    """Format a row dict into a compact string."""
+    items = []
+    for k, v in row.items():
+        if v is None:
+            continue
+        items.append(f"{k}={v}")
+    return ", ".join(items)

From 47ecc13b7b5d3142c3f02eca8dc8f0abe8ae55dd Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 08:42:52 +0000
Subject: [PATCH 11/19] Add Compare Snapshots with Plotly charts, enhanced
 best-practice analysis, exclude system queries, 500-char SQL text

---
 tools/pg-assistant/app.py              | 203 ++++++
 tools/pg-assistant/auto_analyse.py     | 396 ++++++++++-
 tools/pg-assistant/requirements.txt    |   1 +
 tools/pg-assistant/snapshot_compare.py | 889 +++++++++++++++++++++++++
 4 files changed, 1469 insertions(+), 20 deletions(-)
 create mode 100644 tools/pg-assistant/snapshot_compare.py

diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index 93429e8..6621141 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -21,6 +21,7 @@
 from llm_client import LLMClient
 from profile_manager import ProfileManager
 from session_monitor import SessionMonitor
+from snapshot_compare import SnapshotComparator
 from sql_generator import SQLGenerationError, SQLGenerator, UnsafeSQLError
 from sql_tuning_advisor import SQLTuningAdvisor
 
@@ -64,6 +65,33 @@ def _connected_db_type() -> str:
     return ""
 
 
+def _render_comparison(result: dict) -> None:
+    """Render the snapshot comparison results with charts and delta table."""
+    # Delta summary table
+    delta_table = result.get("delta_table", [])
+    if delta_table:
+        st.markdown("### Delta Summary")
+        df = pd.DataFrame(delta_table)
+        st.dataframe(df, use_container_width=True, hide_index=True)
+
+    # Plotly charts
+    figures = result.get("figures", [])
+    if figures:
+        st.markdown("### Visual Comparison")
+        for fig_info in figures:
+            title = fig_info.get("title", "")
+            fig = fig_info.get("fig")
+            if fig is not None:
+                st.markdown(f"**{title}**")
+                st.plotly_chart(fig, use_container_width=True)
+
+    # LLM analysis
+    analysis = result.get("analysis", "")
+    if analysis:
+        st.markdown("### AI Comparison Analysis")
+        st.markdown(analysis)
+
+
 # ---------------------------------------------------------------------------
 # Sidebar -- connection & profile management
 # ---------------------------------------------------------------------------
@@ -268,6 +296,7 @@ def _connected_db_type() -> str:
     tab_schema,
     tab_monitor,
     tab_analyse,
+    tab_compare,
     tab_sessions,
     tab_tuning,
     tab_history,
@@ -277,6 +306,7 @@ def _connected_db_type() -> str:
         "📋 Schema",
         "📡 Auto Monitor",
         "📊 Auto Analyse",
+        "🔀 Compare Snapshots",
         "🔒 Sessions & Locks",
         "🔧 SQL Tuning Advisor",
         "📜 History",
@@ -1050,6 +1080,179 @@ def _connected_db_type() -> str:
         elif tune_btn:
             st.warning("Please enter a SQL statement to tune.")
 
+# ---- Compare Snapshots tab ------------------------------------------------
+with tab_compare:
+    st.subheader("Compare Two Snapshots")
+
+    if not st.session_state.db_client:
+        st.warning("Connect to a database first.")
+    else:
+        db_type = _connected_db_type()
+        comparator = SnapshotComparator(
+            st.session_state.db_client, st.session_state.llm_client
+        )
+
+        if db_type == DB_TYPE_ORACLE:
+            st.markdown(
+                "Select **two AWR snapshot ranges** to compare. "
+                "The tool will show delta metrics and charts."
+            )
+            # Load available snapshots
+            analyser_cmp = PerformanceAnalyser(
+                st.session_state.db_client, st.session_state.llm_client
+            )
+            snap_result = analyser_cmp.list_awr_snapshots()
+            if "error" in snap_result:
+                st.error(f"Cannot load snapshots: {snap_result['error']}")
+            else:
+                snaps = snap_result.get("rows", [])
+                if not snaps:
+                    st.info("No AWR snapshots found.")
+                else:
+                    snap_ids = sorted(
+                        {int(s["snap_id"]) for s in snaps if s.get("snap_id")}
+                    )
+                    snap_labels = {
+                        int(s["snap_id"]): (
+                            f"{s['snap_id']} - {s.get('end_interval_time', '')}"
+                        )
+                        for s in snaps
+                        if s.get("snap_id")
+                    }
+
+                    col_a, col_b = st.columns(2)
+                    with col_a:
+                        st.markdown("**Snapshot Range A (Baseline)**")
+                        a_begin = st.selectbox(
+                            "A \u2014 Begin Snap",
+                            snap_ids,
+                            index=0,
+                            key="cmp_a_begin",
+                            format_func=lambda x: snap_labels.get(x, str(x)),
+                        )
+                        a_end = st.selectbox(
+                            "A \u2014 End Snap",
+                            snap_ids,
+                            index=min(1, len(snap_ids) - 1),
+                            key="cmp_a_end",
+                            format_func=lambda x: snap_labels.get(x, str(x)),
+                        )
+                    with col_b:
+                        st.markdown("**Snapshot Range B (Current)**")
+                        b_begin = st.selectbox(
+                            "B \u2014 Begin Snap",
+                            snap_ids,
+                            index=max(0, len(snap_ids) - 2),
+                            key="cmp_b_begin",
+                            format_func=lambda x: snap_labels.get(x, str(x)),
+                        )
+                        b_end = st.selectbox(
+                            "B \u2014 End Snap",
+                            snap_ids,
+                            index=len(snap_ids) - 1,
+                            key="cmp_b_end",
+                            format_func=lambda x: snap_labels.get(x, str(x)),
+                        )
+
+                    if st.button("\U0001f50d Compare Snapshots", key="cmp_ora_btn"):
+                        if a_begin >= a_end:
+                            st.error("Range A: Begin snap must be less than End snap.")
+                        elif b_begin >= b_end:
+                            st.error("Range B: Begin snap must be less than End snap.")
+                        else:
+                            with st.spinner("Comparing snapshots\u2026"):
+                                result = comparator.compare_oracle(
+                                    a_begin, a_end, b_begin, b_end
+                                )
+                            _render_comparison(result)
+
+        elif db_type == DB_TYPE_POSTGRESQL:
+            cmp_mode = st.radio(
+                "Comparison mode",
+                ["pgProfile Sample Ranges", "pg_stat_statements (latest)"],
+                key="cmp_pg_mode",
+                horizontal=True,
+            )
+
+            if cmp_mode == "pgProfile Sample Ranges":
+                analyser_cmp = PerformanceAnalyser(
+                    st.session_state.db_client, st.session_state.llm_client
+                )
+                samp_result = analyser_cmp.list_pgprofile_samples()
+                if "error" in samp_result:
+                    st.error(f"Cannot load pgProfile samples: {samp_result['error']}")
+                else:
+                    samps = samp_result.get("rows", [])
+                    if not samps:
+                        st.info("No pgProfile samples found.")
+                    else:
+                        samp_ids = sorted(
+                            {int(s["sample_id"]) for s in samps if s.get("sample_id")}
+                        )
+                        samp_labels = {
+                            int(s["sample_id"]): (
+                                f"{s['sample_id']} - {s.get('sample_time', '')}"
+                            )
+                            for s in samps
+                            if s.get("sample_id")
+                        }
+
+                        col_a, col_b = st.columns(2)
+                        with col_a:
+                            st.markdown("**Sample Range A (Baseline)**")
+                            sa_begin = st.selectbox(
+                                "A \u2014 Begin Sample",
+                                samp_ids,
+                                index=0,
+                                key="cmp_sa_begin",
+                                format_func=lambda x: samp_labels.get(x, str(x)),
+                            )
+                            sa_end = st.selectbox(
+                                "A \u2014 End Sample",
+                                samp_ids,
+                                index=min(1, len(samp_ids) - 1),
+                                key="cmp_sa_end",
+                                format_func=lambda x: samp_labels.get(x, str(x)),
+                            )
+                        with col_b:
+                            st.markdown("**Sample Range B (Current)**")
+                            sb_begin = st.selectbox(
+                                "B \u2014 Begin Sample",
+                                samp_ids,
+                                index=max(0, len(samp_ids) - 2),
+                                key="cmp_sb_begin",
+                                format_func=lambda x: samp_labels.get(x, str(x)),
+                            )
+                            sb_end = st.selectbox(
+                                "B \u2014 End Sample",
+                                samp_ids,
+                                index=len(samp_ids) - 1,
+                                key="cmp_sb_end",
+                                format_func=lambda x: samp_labels.get(x, str(x)),
+                            )
+
+                        if st.button("\U0001f50d Compare Samples", key="cmp_pg_btn"):
+                            if sa_begin >= sa_end:
+                                st.error("Range A: Begin must be less than End.")
+                            elif sb_begin >= sb_end:
+                                st.error("Range B: Begin must be less than End.")
+                            else:
+                                with st.spinner("Comparing samples\u2026"):
+                                    result = comparator.compare_pgprofile(
+                                        sa_begin, sa_end, sb_begin, sb_end
+                                    )
+                                _render_comparison(result)
+
+            else:
+                st.info(
+                    "pg_stat_statements shows cumulative stats since last "
+                    "reset. For snapshot comparison, use pgProfile sample "
+                    "ranges above.\n\n"
+                    "You can view the current pg_stat_statements data in "
+                    "the **Auto Analyse** tab."
+                )
+
+
 # ---- History tab ----------------------------------------------------------
 with tab_history:
     st.subheader("Query History")
diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index c266d60..7dd305e 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -29,8 +29,13 @@
             executions,
             buffer_gets,
             disk_reads,
-            SUBSTR(sql_text, 1, 200) AS sql_text
+            SUBSTR(sql_fulltext, 1, 500) AS sql_text
         FROM v$sql
+        WHERE parsing_schema_name NOT IN (
+            'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+            'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+            'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+        )
         ORDER BY elapsed_time DESC
     ) WHERE ROWNUM <= 20
 """
@@ -99,13 +104,22 @@
             ROUND(s.elapsed_time / 1e6, 2) AS elapsed_sec,
             s.buffer_gets,
             s.disk_reads,
-            SUBSTR(s.sql_text, 1, 200) AS sql_text
+            SUBSTR(s.sql_fulltext, 1, 500) AS sql_text
         FROM v$sql_plan p
         JOIN v$sql s ON p.sql_id = s.sql_id
             AND p.child_number = s.child_number
         WHERE p.operation = 'TABLE ACCESS'
             AND p.options = 'FULL'
-            AND p.object_owner NOT IN ('SYS', 'SYSTEM', 'DBSNMP', 'OUTLN')
+            AND p.object_owner NOT IN (
+                'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+                'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+                'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+            )
+            AND s.parsing_schema_name NOT IN (
+                'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+                'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+                'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+            )
         ORDER BY s.elapsed_time DESC
     ) WHERE ROWNUM <= 20
 """
@@ -120,9 +134,14 @@
             executions,
             buffer_gets,
             ROUND(buffer_gets / GREATEST(executions, 1)) AS gets_per_exec,
-            SUBSTR(sql_text, 1, 200) AS sql_text
+            SUBSTR(sql_fulltext, 1, 500) AS sql_text
         FROM v$sql
         WHERE cpu_time > 0
+          AND parsing_schema_name NOT IN (
+              'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+              'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+              'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+          )
         ORDER BY cpu_time DESC
     ) WHERE ROWNUM <= 15
 """
@@ -201,12 +220,17 @@
             SUM(s.executions_delta) AS executions,
             SUM(s.buffer_gets_delta) AS buffer_gets,
             SUM(s.disk_reads_delta) AS disk_reads,
-            DBMS_LOB.SUBSTR(t.sql_text, 200, 1) AS sql_text
+            DBMS_LOB.SUBSTR(t.sql_text, 500, 1) AS sql_text
         FROM dba_hist_sqlstat s
         JOIN dba_hist_sqltext t ON s.sql_id = t.sql_id AND s.dbid = t.dbid
         WHERE s.snap_id BETWEEN :begin_snap AND :end_snap
+          AND s.parsing_schema_name NOT IN (
+              'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+              'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+              'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+          )
         GROUP BY s.sql_id, s.plan_hash_value,
-                 DBMS_LOB.SUBSTR(t.sql_text, 200, 1)
+                 DBMS_LOB.SUBSTR(t.sql_text, 500, 1)
         ORDER BY elapsed_sec DESC
     ) WHERE ROWNUM <= 20
 """
@@ -299,7 +323,7 @@
 _PG_TOP_QUERIES = """
     SELECT
         queryid,
-        LEFT(query, 200) AS query_text,
+        LEFT(query, 500) AS query_text,
         calls,
         ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec,
         ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec,
@@ -314,6 +338,13 @@
             ELSE 100
         END AS cache_hit_pct
     FROM pg_stat_statements
+    WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database())
+      AND queryid IS NOT NULL
+      AND query NOT LIKE 'SET %%'
+      AND query NOT LIKE 'RESET %%'
+      AND query NOT LIKE 'BEGIN%%'
+      AND query NOT LIKE 'COMMIT%%'
+      AND query NOT LIKE 'ROLLBACK%%'
     ORDER BY total_exec_time DESC
     LIMIT 20
 """
@@ -420,7 +451,7 @@
 _PG_TOP_CPU_QUERIES = """
     SELECT
         queryid,
-        LEFT(query, 300) AS query_text,
+        LEFT(query, 500) AS query_text,
         calls,
         ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec,
         ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec,
@@ -439,6 +470,13 @@
         temp_blks_read,
         temp_blks_written
     FROM pg_stat_statements
+    WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database())
+      AND queryid IS NOT NULL
+      AND query NOT LIKE 'SET %%'
+      AND query NOT LIKE 'RESET %%'
+      AND query NOT LIKE 'BEGIN%%'
+      AND query NOT LIKE 'COMMIT%%'
+      AND query NOT LIKE 'ROLLBACK%%'
     ORDER BY total_exec_time DESC
     LIMIT 15
 """
@@ -447,7 +485,7 @@
     SELECT
         pid,
         usename,
-        LEFT(query, 200) AS query,
+        LEFT(query, 500) AS query,
         wait_event_type,
         wait_event,
         state,
@@ -459,41 +497,345 @@
     LIMIT 20
 """
 
+# ---------------------------------------------------------------------------
+# Oracle best-practice checks
+# ---------------------------------------------------------------------------
+_ORA_ROW_CONTENTION = """
+    SELECT * FROM (
+        SELECT
+            event,
+            total_waits,
+            ROUND(time_waited / 100, 2) AS time_waited_sec,
+            ROUND(average_wait / 100, 4) AS avg_wait_sec
+        FROM v$system_event
+        WHERE event IN (
+            'enq: TX - row lock contention',
+            'enq: TX - index contention',
+            'enq: TX - allocate ITL entry',
+            'enq: TM - contention',
+            'enq: HW - contention',
+            'buffer busy waits',
+            'gc buffer busy acquire',
+            'gc buffer busy release',
+            'row cache lock',
+            'library cache lock',
+            'cursor: pin S wait on X'
+        )
+        ORDER BY time_waited DESC
+    ) WHERE ROWNUM <= 20
+"""
+
+_ORA_SEQUENCE_NO_CACHE = """
+    SELECT
+        sequence_owner,
+        sequence_name,
+        min_value,
+        max_value,
+        increment_by,
+        cache_size,
+        order_flag,
+        cycle_flag,
+        last_number
+    FROM all_sequences
+    WHERE sequence_owner NOT IN (
+        'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+        'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+        'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+    )
+    AND (cache_size = 0 OR cache_size = 1)
+    ORDER BY sequence_owner, sequence_name
+"""
+
+_ORA_HIGH_ELAPSED_PER_EXEC = """
+    SELECT * FROM (
+        SELECT
+            sql_id,
+            plan_hash_value,
+            executions,
+            ROUND(elapsed_time / GREATEST(executions, 1) / 1e6, 4)
+                AS avg_elapsed_sec,
+            ROUND(elapsed_time / 1e6, 2) AS total_elapsed_sec,
+            buffer_gets,
+            ROUND(buffer_gets / GREATEST(executions, 1)) AS gets_per_exec,
+            SUBSTR(sql_fulltext, 1, 500) AS sql_text
+        FROM v$sql
+        WHERE executions > 0
+          AND elapsed_time / GREATEST(executions, 1) / 1e6 > 1
+          AND parsing_schema_name NOT IN (
+              'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+              'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+              'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+          )
+        ORDER BY avg_elapsed_sec DESC
+    ) WHERE ROWNUM <= 15
+"""
+
+_ORA_HIGH_EXEC_COUNT = """
+    SELECT * FROM (
+        SELECT
+            sql_id,
+            plan_hash_value,
+            executions,
+            ROUND(elapsed_time / 1e6, 2) AS total_elapsed_sec,
+            ROUND(cpu_time / 1e6, 2) AS total_cpu_sec,
+            buffer_gets,
+            ROUND(buffer_gets / GREATEST(executions, 1)) AS gets_per_exec,
+            SUBSTR(sql_fulltext, 1, 500) AS sql_text
+        FROM v$sql
+        WHERE executions > 1000
+          AND parsing_schema_name NOT IN (
+              'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+              'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+              'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+          )
+        ORDER BY executions DESC
+    ) WHERE ROWNUM <= 15
+"""
+
+_ORA_REDO_LOG_SWITCHES = """
+    SELECT * FROM (
+        SELECT
+            TO_CHAR(first_time, 'YYYY-MM-DD HH24') AS switch_hour,
+            COUNT(*) AS switches
+        FROM v$log_history
+        WHERE first_time > SYSDATE - 1
+        GROUP BY TO_CHAR(first_time, 'YYYY-MM-DD HH24')
+        ORDER BY switch_hour DESC
+    ) WHERE ROWNUM <= 24
+"""
+
+_ORA_TEMP_USAGE = """
+    SELECT
+        tablespace_name,
+        ROUND(SUM(bytes_used) / 1048576, 2) AS used_mb,
+        ROUND(SUM(bytes_free) / 1048576, 2) AS free_mb,
+        ROUND(SUM(bytes_used) / (SUM(bytes_used) + SUM(bytes_free)) * 100, 2)
+            AS pct_used
+    FROM v$temp_space_header
+    GROUP BY tablespace_name
+    ORDER BY pct_used DESC
+"""
+
+_ORA_PARALLEL_QUERIES = """
+    SELECT * FROM (
+        SELECT
+            sql_id,
+            users_executing,
+            px_servers_executions AS px_servers,
+            ROUND(elapsed_time / 1e6, 2) AS elapsed_sec,
+            SUBSTR(sql_fulltext, 1, 500) AS sql_text
+        FROM v$sql
+        WHERE px_servers_executions > 0
+          AND parsing_schema_name NOT IN (
+              'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+              'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+              'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+          )
+        ORDER BY px_servers_executions DESC
+    ) WHERE ROWNUM <= 10
+"""
+
+# ---------------------------------------------------------------------------
+# PostgreSQL best-practice checks
+# ---------------------------------------------------------------------------
+_PG_HIGH_ELAPSED_PER_EXEC = """
+    SELECT
+        queryid,
+        LEFT(query, 500) AS query_text,
+        calls,
+        ROUND((total_exec_time / calls / 1000)::numeric, 4) AS avg_elapsed_sec,
+        ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec,
+        rows,
+        shared_blks_hit,
+        shared_blks_read,
+        temp_blks_read,
+        temp_blks_written
+    FROM pg_stat_statements
+    WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database())
+      AND calls > 0
+      AND total_exec_time / calls / 1000 > 1
+      AND queryid IS NOT NULL
+      AND query NOT LIKE 'SET %%'
+      AND query NOT LIKE 'RESET %%'
+      AND query NOT LIKE 'BEGIN%%'
+      AND query NOT LIKE 'COMMIT%%'
+      AND query NOT LIKE 'ROLLBACK%%'
+    ORDER BY avg_elapsed_sec DESC
+    LIMIT 15
+"""
+
+_PG_HIGH_EXEC_COUNT = """
+    SELECT
+        queryid,
+        LEFT(query, 500) AS query_text,
+        calls,
+        ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec,
+        ROUND((mean_exec_time / 1000)::numeric, 4) AS mean_exec_sec,
+        rows,
+        shared_blks_hit + shared_blks_read AS total_blocks
+    FROM pg_stat_statements
+    WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database())
+      AND calls > 1000
+      AND queryid IS NOT NULL
+      AND query NOT LIKE 'SET %%'
+      AND query NOT LIKE 'RESET %%'
+      AND query NOT LIKE 'BEGIN%%'
+      AND query NOT LIKE 'COMMIT%%'
+      AND query NOT LIKE 'ROLLBACK%%'
+    ORDER BY calls DESC
+    LIMIT 15
+"""
+
+_PG_BLOAT_ESTIMATE = """
+    SELECT
+        schemaname, relname,
+        n_live_tup,
+        n_dead_tup,
+        CASE WHEN n_live_tup > 0
+            THEN ROUND(n_dead_tup::numeric / n_live_tup * 100, 2)
+            ELSE 0
+        END AS dead_pct,
+        pg_relation_size(relid) / 1048576 AS table_size_mb,
+        last_autovacuum::text,
+        last_autoanalyze::text
+    FROM pg_stat_user_tables
+    WHERE n_dead_tup > 10000
+      OR (n_live_tup > 0 AND n_dead_tup::numeric / n_live_tup > 0.2)
+    ORDER BY n_dead_tup DESC
+    LIMIT 20
+"""
+
+_PG_SEQUENCE_CACHE = """
+    SELECT
+        schemaname,
+        sequencename,
+        start_value,
+        min_value,
+        max_value,
+        increment_by,
+        cache_size,
+        cycle
+    FROM pg_sequences
+    WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
+      AND (cache_size IS NULL OR cache_size <= 1)
+    ORDER BY schemaname, sequencename
+"""
+
+_PG_TEMP_FILE_USAGE = """
+    SELECT
+        queryid,
+        LEFT(query, 500) AS query_text,
+        calls,
+        temp_blks_read,
+        temp_blks_written,
+        ROUND((temp_blks_read + temp_blks_written) * 8.0 / 1024, 2)
+            AS temp_mb,
+        ROUND((total_exec_time / 1000)::numeric, 2) AS total_exec_sec
+    FROM pg_stat_statements
+    WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database())
+      AND (temp_blks_read > 0 OR temp_blks_written > 0)
+      AND queryid IS NOT NULL
+      AND query NOT LIKE 'SET %%'
+      AND query NOT LIKE 'RESET %%'
+      AND query NOT LIKE 'BEGIN%%'
+    ORDER BY temp_blks_read + temp_blks_written DESC
+    LIMIT 15
+"""
+
+_PG_CONNECTION_STATS = """
+    SELECT
+        state,
+        COUNT(*) AS count,
+        COALESCE(wait_event_type, 'None') AS wait_event_type
+    FROM pg_stat_activity
+    WHERE backend_type = 'client backend'
+    GROUP BY state, wait_event_type
+    ORDER BY count DESC
+"""
+
+_PG_CHECKPOINT_STATS = """
+    SELECT
+        checkpoints_timed,
+        checkpoints_req,
+        buffers_checkpoint,
+        buffers_clean,
+        buffers_backend,
+        maxwritten_clean,
+        ROUND(buffers_backend::numeric /
+              GREATEST(buffers_checkpoint + buffers_clean + buffers_backend, 1)
+              * 100, 2) AS backend_write_pct
+    FROM pg_stat_bgwriter
+"""
+
 ANALYSIS_SYSTEM_PROMPT = (
     "You are a senior DBA and database performance engineer performing a deep-dive "
     "analysis. You have been given detailed performance data including SQL IDs/query IDs, "
-    "execution plans, full table scans, existing indexes, and stats freshness.\n\n"
+    "execution plans, full table scans, existing indexes, stats freshness, row contention "
+    "events, sequence caching issues, and other best-practice metrics.\n\n"
     "Produce the following sections:\n\n"
     "## Executive Summary\n"
     "2-3 sentences summarising the overall database health and biggest concern.\n\n"
-    "## High-CPU / Long-Running SQL\n"
-    "For EACH problematic SQL (reference the sql_id or queryid):\n"
-    "- Quote the sql_id / queryid and a snippet of the SQL text\n"
-    "- Explain WHY it is slow (full table scan, missing index, bad stats, etc.)\n"
+    "## High Elapsed Time SQL\n"
+    "For EACH SQL with high average elapsed time per execution (reference sql_id/queryid):\n"
+    "- Quote the sql_id / queryid, avg elapsed, total elapsed, and a snippet\n"
+    "- Explain WHY it is slow (full table scan, missing index, bad join, bad stats)\n"
     "- Provide the EXACT fix SQL (CREATE INDEX, ANALYZE, rewrite, etc.)\n\n"
+    "## High Execution Count SQL\n"
+    "For SQL executed thousands of times:\n"
+    "- Even small per-execution cost adds up; flag these with sql_id/queryid\n"
+    "- Suggest caching, batching, or query consolidation where applicable\n"
+    "- Provide exact fix SQL if index or rewrite would help\n\n"
     "## Full Table Scans\n"
     "List every table being full-scanned with the sql_id causing it.\n"
     "- For each, check the existing indexes section — if an index already exists "
     "that should have been used, suggest gathering fresh stats or checking predicates.\n"
     "- If no suitable index exists, provide the exact CREATE INDEX statement.\n\n"
+    "## Row Contention & Locking Issues\n"
+    "Analyse the row contention / enqueue wait events data:\n"
+    "- Flag 'enq: TX - row lock contention' and similar events with wait times\n"
+    "- Identify the likely cause (hot blocks, ITL contention, poor sequence caching)\n"
+    "- Provide fixes: increase INITRANS, reduce transaction scope, batch commits\n\n"
+    "## Sequence Caching Issues\n"
+    "For sequences with NOCACHE or CACHE 1:\n"
+    "- Explain the performance impact (row cache lock waits, redo contention)\n"
+    "- Provide exact ALTER SEQUENCE ... CACHE 20 (or higher) statements\n"
+    "- Flag ORDER sequences that may need NOORDER for better performance\n\n"
     "## Missing / Recommended Indexes\n"
     "Based on the query patterns (WHERE, JOIN, ORDER BY columns visible in SQL text), "
     "suggest specific CREATE INDEX statements. Reference the sql_id/queryid that "
     "would benefit.\n\n"
-    "## Stale Statistics / Vacuum Issues\n"
+    "## Stale Statistics / Vacuum / Bloat Issues\n"
     "List tables with stale or missing stats. Provide exact ANALYZE / DBMS_STATS "
-    "commands. For PostgreSQL, flag tables with high dead-tuple ratios needing VACUUM.\n\n"
+    "commands. For PostgreSQL, flag tables with high dead-tuple ratios needing VACUUM "
+    "and estimate bloat. For Oracle, flag tables not analysed in 7+ days.\n\n"
+    "## Temp Space / Sort Issues\n"
+    "Flag queries spilling to temp (temp_blks_read/written for PG, sorts (disk) for "
+    "Oracle). Suggest work_mem increase, index to avoid sort, or query rewrite.\n\n"
     "## Unused Indexes\n"
     "List indexes that have never been scanned and recommend dropping them "
     "(provide DROP INDEX statements).\n\n"
+    "## Checkpoint / Redo / WAL Issues\n"
+    "For Oracle: flag excessive redo log switches (>6/hour). "
+    "For PostgreSQL: flag high backend_write_pct (buffers_backend vs checkpoint). "
+    "Suggest redo log sizing or checkpoint_completion_target tuning.\n\n"
     "## Action Plan (Priority Order)\n"
     "Numbered list of actions sorted by impact. Each action must include:\n"
-    "- The specific sql_id / queryid / table affected\n"
+    "- The specific sql_id / queryid / table / sequence affected\n"
     "- The exact SQL command to execute\n"
     "- Expected improvement\n\n"
-    "IMPORTANT: Be SPECIFIC — always reference sql_id, queryid, or table name. "
-    "Never give generic advice like 'add indexes where needed'. "
-    "Use markdown formatting with code blocks for SQL."
+    "IMPORTANT RULES:\n"
+    "1. Be SPECIFIC — always reference sql_id, queryid, or table name.\n"
+    "2. Always QUOTE the full SQL text provided in the data alongside the sql_id/queryid. "
+    "Show the complete query text so the reader can understand exactly which SQL is problematic.\n"
+    "3. Never give generic advice like 'add indexes where needed'.\n"
+    "4. Exclude all system/internal queries — focus only on user application SQL.\n"
+    "5. Use markdown formatting with code blocks for SQL.\n"
+    "6. For each problematic SQL, show it in a code block like:\n"
+    "   ```sql\n"
+    "   -- sql_id: ABC123\n"
+    "   SELECT ... (full query text from the data)\n"
+    "   ```\n"
+    "7. Then explain the issue and provide the fix SQL in another code block."
 )
 
 
@@ -608,13 +950,20 @@ def _collect_oracle(self) -> dict[str, Any]:
         queries = {
             "top_cpu_sql": _ORA_TOP_CPU_SQL,
             "top_elapsed_sql": _ORA_TOP_SQL,
+            "high_elapsed_per_exec": _ORA_HIGH_ELAPSED_PER_EXEC,
+            "high_execution_count": _ORA_HIGH_EXEC_COUNT,
             "full_table_scans": _ORA_FULL_TABLE_SCANS,
             "existing_indexes": _ORA_EXISTING_INDEXES,
             "stale_statistics": _ORA_STALE_STATS,
+            "row_contention": _ORA_ROW_CONTENTION,
+            "sequence_no_cache": _ORA_SEQUENCE_NO_CACHE,
             "wait_events": _ORA_WAIT_EVENTS,
             "system_stats": _ORA_SYS_STATS,
             "sga_info": _ORA_SGA,
             "tablespace_io": _ORA_TABLESPACE_IO,
+            "redo_log_switches": _ORA_REDO_LOG_SWITCHES,
+            "temp_usage": _ORA_TEMP_USAGE,
+            "parallel_queries": _ORA_PARALLEL_QUERIES,
         }
         for name, sql in queries.items():
             result = self.db_client.execute_query(sql)
@@ -705,6 +1054,8 @@ def _collect_postgresql(self) -> dict[str, Any]:
         queries = {
             "top_cpu_queries": _PG_TOP_CPU_QUERIES,
             "top_queries": _PG_TOP_QUERIES,
+            "high_elapsed_per_exec": _PG_HIGH_ELAPSED_PER_EXEC,
+            "high_execution_count": _PG_HIGH_EXEC_COUNT,
             "seq_scan_tables": _PG_SEQ_SCAN_TABLES,
             "existing_indexes": _PG_EXISTING_INDEXES,
             "stale_stats_vacuum": _PG_STALE_STATS,
@@ -713,6 +1064,11 @@ def _collect_postgresql(self) -> dict[str, Any]:
             "bgwriter_stats": _PG_BGWRITER,
             "unused_indexes": _PG_UNUSED_INDEXES,
             "lock_waits": _PG_LOCK_WAITS,
+            "bloat_estimate": _PG_BLOAT_ESTIMATE,
+            "sequence_cache_issues": _PG_SEQUENCE_CACHE,
+            "temp_file_usage": _PG_TEMP_FILE_USAGE,
+            "connection_stats": _PG_CONNECTION_STATS,
+            "checkpoint_stats": _PG_CHECKPOINT_STATS,
         }
         for name, sql in queries.items():
             result = self.db_client.execute_query(sql)
diff --git a/tools/pg-assistant/requirements.txt b/tools/pg-assistant/requirements.txt
index 8efff7e..3732284 100644
--- a/tools/pg-assistant/requirements.txt
+++ b/tools/pg-assistant/requirements.txt
@@ -3,3 +3,4 @@ psycopg2-binary>=2.9.0,<3.0.0
 oracledb>=2.0.0,<3.0.0
 streamlit>=1.28.0,<2.0.0
 pandas>=2.0.0,<3.0.0
+plotly>=5.0.0,<6.0.0
diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py
new file mode 100644
index 0000000..518f7fc
--- /dev/null
+++ b/tools/pg-assistant/snapshot_compare.py
@@ -0,0 +1,889 @@
+"""Compare two database snapshots with visual charts.
+
+Supports Oracle AWR snap-ID ranges and PostgreSQL pgProfile sample-ID ranges.
+Produces Plotly figures for side-by-side comparison of key metrics.
+"""
+
+import logging
+from typing import Any
+
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+
+from db_client import BaseDBClient, DB_TYPE_ORACLE
+from llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Oracle AWR delta queries (parameterised with :begin_snap / :end_snap)
+# ---------------------------------------------------------------------------
+_ORA_SNAP_TOP_SQL = """
+    SELECT * FROM (
+        SELECT
+            s.sql_id,
+            SUM(s.elapsed_time_delta) / 1e6 AS elapsed_sec,
+            SUM(s.cpu_time_delta) / 1e6 AS cpu_sec,
+            SUM(s.executions_delta) AS executions,
+            SUM(s.buffer_gets_delta) AS buffer_gets,
+            SUM(s.disk_reads_delta) AS disk_reads,
+            SUM(s.rows_processed_delta) AS rows_processed
+        FROM dba_hist_sqlstat s
+        WHERE s.snap_id BETWEEN {begin_snap} AND {end_snap}
+          AND s.parsing_schema_name NOT IN (
+              'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+              'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+              'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+          )
+        GROUP BY s.sql_id
+        ORDER BY elapsed_sec DESC
+    ) WHERE ROWNUM <= 20
+"""
+
+_ORA_SNAP_WAIT_EVENTS = """
+    SELECT * FROM (
+        SELECT
+            event_name AS event,
+            wait_class,
+            SUM(total_waits_fg) AS total_waits,
+            ROUND(SUM(time_waited_micro_fg) / 1e6, 2) AS time_waited_sec
+        FROM dba_hist_system_event
+        WHERE snap_id BETWEEN {begin_snap} AND {end_snap}
+          AND wait_class != 'Idle'
+        GROUP BY event_name, wait_class
+        ORDER BY time_waited_sec DESC
+    ) WHERE ROWNUM <= 15
+"""
+
+_ORA_SNAP_SYS_STATS = """
+    SELECT
+        stat_name AS name,
+        SUM(value) AS value
+    FROM dba_hist_sysstat
+    WHERE snap_id BETWEEN {begin_snap} AND {end_snap}
+      AND stat_name IN (
+        'db block gets', 'consistent gets', 'physical reads',
+        'redo size', 'sorts (memory)', 'sorts (disk)',
+        'rows processed', 'parse count (total)', 'parse count (hard)',
+        'execute count', 'user commits', 'user rollbacks',
+        'enqueue waits', 'enqueue timeouts'
+    )
+    GROUP BY stat_name
+    ORDER BY stat_name
+"""
+
+_ORA_SNAP_TOP_ELAPSED = """
+    SELECT * FROM (
+        SELECT
+            s.sql_id,
+            ROUND(SUM(s.elapsed_time_delta) / GREATEST(SUM(s.executions_delta), 1) / 1e6, 4)
+                AS avg_elapsed_sec,
+            SUM(s.executions_delta) AS executions,
+            SUM(s.buffer_gets_delta) AS buffer_gets
+        FROM dba_hist_sqlstat s
+        WHERE s.snap_id BETWEEN {begin_snap} AND {end_snap}
+          AND s.parsing_schema_name NOT IN (
+              'SYS','SYSTEM','DBSNMP','OUTLN','XDB','WMSYS',
+              'CTXSYS','MDSYS','ORDSYS','ORDDATA','LBACSYS',
+              'APEX_PUBLIC_USER','FLOWS_FILES','DVSYS','AUDSYS'
+          )
+        GROUP BY s.sql_id
+        HAVING SUM(s.executions_delta) > 0
+        ORDER BY avg_elapsed_sec DESC
+    ) WHERE ROWNUM <= 15
+"""
+
+# ---------------------------------------------------------------------------
+# PostgreSQL pgProfile delta queries (parameterised with {begin_sample}/{end_sample})
+# ---------------------------------------------------------------------------
+_PG_SNAP_TOP_SQL = """
+    SELECT
+        sl.queryid::text AS queryid,
+        SUM(ss.exec_time) / 1000.0 AS elapsed_sec,
+        SUM(ss.calls) AS executions,
+        SUM(ss.shared_blks_hit) AS shared_blks_hit,
+        SUM(ss.shared_blks_read) AS shared_blks_read,
+        SUM(ss.rows) AS rows_processed
+    FROM profile.stmt_list sl
+    JOIN profile.sample_statements ss ON sl.queryid_md5 = ss.queryid_md5
+    WHERE ss.sample_id BETWEEN {begin_sample} AND {end_sample}
+    GROUP BY sl.queryid
+    ORDER BY elapsed_sec DESC
+    LIMIT 20
+"""
+
+_PG_SNAP_WAIT_EVENTS = """
+    SELECT
+        event_type,
+        event,
+        SUM(tot_waited)::numeric AS time_waited_sec,
+        SUM(tot_waits) AS total_waits
+    FROM profile.wait_sampling_total
+    WHERE sample_id BETWEEN {begin_sample} AND {end_sample}
+    GROUP BY event_type, event
+    ORDER BY time_waited_sec DESC
+    LIMIT 15
+"""
+
+# PostgreSQL pg_stat_statements cumulative (no snap range - latest snapshot)
+_PG_STAT_TOP_SQL = """
+    SELECT
+        queryid::text AS queryid,
+        LEFT(query, 120) AS query_text,
+        ROUND((total_exec_time / 1000)::numeric, 2) AS elapsed_sec,
+        calls AS executions,
+        shared_blks_hit,
+        shared_blks_read,
+        rows AS rows_processed,
+        ROUND((mean_exec_time / 1000)::numeric, 4) AS avg_elapsed_sec
+    FROM pg_stat_statements
+    WHERE dbid = (SELECT oid FROM pg_database WHERE datname = current_database())
+      AND queryid IS NOT NULL
+      AND query NOT LIKE 'SET %%'
+      AND query NOT LIKE 'RESET %%'
+      AND query NOT LIKE 'BEGIN%%'
+      AND query NOT LIKE 'COMMIT%%'
+      AND query NOT LIKE 'ROLLBACK%%'
+    ORDER BY total_exec_time DESC
+    LIMIT 20
+"""
+
+_PG_DB_STATS = """
+    SELECT
+        xact_commit, xact_rollback,
+        blks_read, blks_hit,
+        tup_returned, tup_fetched,
+        tup_inserted, tup_updated, tup_deleted,
+        temp_files, temp_bytes
+    FROM pg_stat_database
+    WHERE datname = current_database()
+"""
+
+
+# ---------------------------------------------------------------------------
+# Comparison engine
+# ---------------------------------------------------------------------------
+class SnapshotComparator:
+    """Compare two snapshot ranges and produce delta metrics + Plotly charts."""
+
+    def __init__(self, db_client: BaseDBClient, llm_client: LLMClient) -> None:
+        self.db = db_client
+        self.llm = llm_client
+        self.is_oracle = db_client.db_type == DB_TYPE_ORACLE
+
+    # -- public API ----------------------------------------------------------
+
+    def compare_oracle(
+        self,
+        snap_a_begin: int,
+        snap_a_end: int,
+        snap_b_begin: int,
+        snap_b_end: int,
+    ) -> dict[str, Any]:
+        """Compare two AWR snap-ID ranges and return metrics + figures."""
+        data_a = self._collect_oracle_snap(snap_a_begin, snap_a_end)
+        data_b = self._collect_oracle_snap(snap_b_begin, snap_b_end)
+        label_a = f"Snap {snap_a_begin}\u2013{snap_a_end}"
+        label_b = f"Snap {snap_b_begin}\u2013{snap_b_end}"
+        return self._build_comparison(data_a, data_b, label_a, label_b)
+
+    def compare_pgprofile(
+        self,
+        samp_a_begin: int,
+        samp_a_end: int,
+        samp_b_begin: int,
+        samp_b_end: int,
+    ) -> dict[str, Any]:
+        """Compare two pgProfile sample-ID ranges."""
+        data_a = self._collect_pg_snap(samp_a_begin, samp_a_end)
+        data_b = self._collect_pg_snap(samp_b_begin, samp_b_end)
+        label_a = f"Sample {samp_a_begin}\u2013{samp_a_end}"
+        label_b = f"Sample {samp_b_begin}\u2013{samp_b_end}"
+        return self._build_comparison(data_a, data_b, label_a, label_b)
+
+    # -- data collection -----------------------------------------------------
+
+    def _run_query(self, sql: str) -> list[dict[str, Any]]:
+        result = self.db.execute_query(sql)
+        if "error" in result:
+            logger.warning("Query error: %s", result["error"])
+            return []
+        return result.get("rows", [])
+
+    def _collect_oracle_snap(self, begin: int, end: int) -> dict[str, Any]:
+        fmt = {"begin_snap": str(begin), "end_snap": str(end)}
+        return {
+            "top_sql": self._run_query(_ORA_SNAP_TOP_SQL.format(**fmt)),
+            "top_elapsed": self._run_query(_ORA_SNAP_TOP_ELAPSED.format(**fmt)),
+            "wait_events": self._run_query(_ORA_SNAP_WAIT_EVENTS.format(**fmt)),
+            "sys_stats": self._run_query(_ORA_SNAP_SYS_STATS.format(**fmt)),
+            "snap_range": f"{begin}-{end}",
+        }
+
+    def _collect_pg_snap(self, begin: int, end: int) -> dict[str, Any]:
+        fmt = {"begin_sample": str(begin), "end_sample": str(end)}
+        return {
+            "top_sql": self._run_query(_PG_SNAP_TOP_SQL.format(**fmt)),
+            "wait_events": self._run_query(_PG_SNAP_WAIT_EVENTS.format(**fmt)),
+            "snap_range": f"{begin}-{end}",
+        }
+
+    # -- comparison logic ----------------------------------------------------
+
+    def _build_comparison(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> dict[str, Any]:
+        figures: list[dict[str, Any]] = []
+
+        # 1) Top SQL by elapsed time - grouped bar chart
+        fig_sql = self._chart_top_sql_elapsed(data_a, data_b, label_a, label_b)
+        if fig_sql:
+            figures.append({"title": "Top SQL by Elapsed Time", "fig": fig_sql})
+
+        # 2) Top SQL by executions - grouped bar chart
+        fig_exec = self._chart_top_sql_executions(data_a, data_b, label_a, label_b)
+        if fig_exec:
+            figures.append({"title": "Top SQL by Executions", "fig": fig_exec})
+
+        # 3) Top SQL by buffer gets - grouped bar chart
+        fig_buf = self._chart_top_sql_buffer_gets(data_a, data_b, label_a, label_b)
+        if fig_buf:
+            figures.append({"title": "Top SQL by Buffer Gets", "fig": fig_buf})
+
+        # 4) Wait events comparison - grouped bar chart
+        fig_waits = self._chart_wait_events(data_a, data_b, label_a, label_b)
+        if fig_waits:
+            figures.append({"title": "Wait Events Comparison", "fig": fig_waits})
+
+        # 5) Wait events by class/type - pie charts side by side
+        fig_pie = self._chart_wait_pie(data_a, data_b, label_a, label_b)
+        if fig_pie:
+            figures.append({"title": "Wait Time Distribution", "fig": fig_pie})
+
+        # 6) System stats comparison (Oracle only)
+        if self.is_oracle:
+            fig_sys = self._chart_sys_stats(data_a, data_b, label_a, label_b)
+            if fig_sys:
+                figures.append({"title": "System Statistics Delta", "fig": fig_sys})
+
+        # 7) SQL elapsed per execution (Oracle only - has top_elapsed)
+        if self.is_oracle:
+            fig_avg = self._chart_avg_elapsed(data_a, data_b, label_a, label_b)
+            if fig_avg:
+                figures.append(
+                    {"title": "Avg Elapsed per Execution (Top SQL)", "fig": fig_avg}
+                )
+
+        # Build delta summary table
+        delta_table = self._build_delta_table(data_a, data_b, label_a, label_b)
+
+        # LLM comparison summary
+        comparison_text = self._format_comparison_text(
+            data_a, data_b, label_a, label_b, delta_table
+        )
+        analysis = self._get_llm_comparison(comparison_text)
+
+        return {
+            "figures": figures,
+            "delta_table": delta_table,
+            "data_a": data_a,
+            "data_b": data_b,
+            "label_a": label_a,
+            "label_b": label_b,
+            "analysis": analysis,
+        }
+
+    # -- chart builders ------------------------------------------------------
+
+    def _chart_top_sql_elapsed(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> go.Figure | None:
+        sql_a = data_a.get("top_sql", [])
+        sql_b = data_b.get("top_sql", [])
+        if not sql_a and not sql_b:
+            return None
+
+        id_key = "sql_id" if self.is_oracle else "queryid"
+        all_ids = []
+        map_a: dict[str, float] = {}
+        map_b: dict[str, float] = {}
+
+        for row in sql_a[:10]:
+            sid = str(row.get(id_key, ""))
+            if sid:
+                all_ids.append(sid)
+                map_a[sid] = float(row.get("elapsed_sec", 0))
+        for row in sql_b[:10]:
+            sid = str(row.get(id_key, ""))
+            if sid and sid not in all_ids:
+                all_ids.append(sid)
+            map_b[sid] = float(row.get("elapsed_sec", 0))
+
+        if not all_ids:
+            return None
+
+        ids = all_ids[:12]
+        short_ids = [s[:13] for s in ids]
+
+        fig = go.Figure()
+        fig.add_trace(
+            go.Bar(
+                name=label_a,
+                x=short_ids,
+                y=[map_a.get(i, 0) for i in ids],
+                marker_color="#636EFA",
+            )
+        )
+        fig.add_trace(
+            go.Bar(
+                name=label_b,
+                x=short_ids,
+                y=[map_b.get(i, 0) for i in ids],
+                marker_color="#EF553B",
+            )
+        )
+        fig.update_layout(
+            barmode="group",
+            title="Top SQL \u2014 Elapsed Time (seconds)",
+            xaxis_title="SQL ID" if self.is_oracle else "Query ID",
+            yaxis_title="Elapsed (sec)",
+            height=420,
+            legend=dict(orientation="h", yanchor="bottom", y=1.02),
+        )
+        return fig
+
+    def _chart_top_sql_executions(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> go.Figure | None:
+        sql_a = data_a.get("top_sql", [])
+        sql_b = data_b.get("top_sql", [])
+        if not sql_a and not sql_b:
+            return None
+
+        id_key = "sql_id" if self.is_oracle else "queryid"
+        all_ids: list[str] = []
+        map_a: dict[str, float] = {}
+        map_b: dict[str, float] = {}
+
+        for row in sql_a[:10]:
+            sid = str(row.get(id_key, ""))
+            if sid:
+                all_ids.append(sid)
+                map_a[sid] = float(row.get("executions", 0))
+        for row in sql_b[:10]:
+            sid = str(row.get(id_key, ""))
+            if sid and sid not in all_ids:
+                all_ids.append(sid)
+            map_b[sid] = float(row.get("executions", 0))
+
+        if not all_ids:
+            return None
+
+        ids = all_ids[:12]
+        short_ids = [s[:13] for s in ids]
+
+        fig = go.Figure()
+        fig.add_trace(
+            go.Bar(
+                name=label_a,
+                x=short_ids,
+                y=[map_a.get(i, 0) for i in ids],
+                marker_color="#636EFA",
+            )
+        )
+        fig.add_trace(
+            go.Bar(
+                name=label_b,
+                x=short_ids,
+                y=[map_b.get(i, 0) for i in ids],
+                marker_color="#EF553B",
+            )
+        )
+        fig.update_layout(
+            barmode="group",
+            title="Top SQL \u2014 Executions",
+            xaxis_title="SQL ID" if self.is_oracle else "Query ID",
+            yaxis_title="Executions",
+            height=420,
+            legend=dict(orientation="h", yanchor="bottom", y=1.02),
+        )
+        return fig
+
+    def _chart_top_sql_buffer_gets(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> go.Figure | None:
+        sql_a = data_a.get("top_sql", [])
+        sql_b = data_b.get("top_sql", [])
+        if not sql_a and not sql_b:
+            return None
+
+        id_key = "sql_id" if self.is_oracle else "queryid"
+        buf_key = "buffer_gets" if self.is_oracle else "shared_blks_hit"
+        all_ids: list[str] = []
+        map_a: dict[str, float] = {}
+        map_b: dict[str, float] = {}
+
+        for row in sql_a[:10]:
+            sid = str(row.get(id_key, ""))
+            if sid:
+                all_ids.append(sid)
+                map_a[sid] = float(row.get(buf_key, 0))
+        for row in sql_b[:10]:
+            sid = str(row.get(id_key, ""))
+            if sid and sid not in all_ids:
+                all_ids.append(sid)
+            map_b[sid] = float(row.get(buf_key, 0))
+
+        if not all_ids:
+            return None
+
+        ids = all_ids[:12]
+        short_ids = [s[:13] for s in ids]
+
+        fig = go.Figure()
+        fig.add_trace(
+            go.Bar(
+                name=label_a,
+                x=short_ids,
+                y=[map_a.get(i, 0) for i in ids],
+                marker_color="#636EFA",
+            )
+        )
+        fig.add_trace(
+            go.Bar(
+                name=label_b,
+                x=short_ids,
+                y=[map_b.get(i, 0) for i in ids],
+                marker_color="#EF553B",
+            )
+        )
+        fig.update_layout(
+            barmode="group",
+            title=f"Top SQL \u2014 {'Buffer Gets' if self.is_oracle else 'Shared Blocks Hit'}",
+            xaxis_title="SQL ID" if self.is_oracle else "Query ID",
+            yaxis_title="Buffer Gets" if self.is_oracle else "Shared Blocks Hit",
+            height=420,
+            legend=dict(orientation="h", yanchor="bottom", y=1.02),
+        )
+        return fig
+
+    def _chart_wait_events(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> go.Figure | None:
+        wa = data_a.get("wait_events", [])
+        wb = data_b.get("wait_events", [])
+        if not wa and not wb:
+            return None
+
+        all_events: list[str] = []
+        map_a: dict[str, float] = {}
+        map_b: dict[str, float] = {}
+
+        for row in wa[:10]:
+            evt = str(row.get("event", ""))
+            if evt:
+                all_events.append(evt)
+                map_a[evt] = float(row.get("time_waited_sec", 0))
+        for row in wb[:10]:
+            evt = str(row.get("event", ""))
+            if evt and evt not in all_events:
+                all_events.append(evt)
+            map_b[evt] = float(row.get("time_waited_sec", 0))
+
+        if not all_events:
+            return None
+
+        events = all_events[:12]
+        short_events = [e[:30] for e in events]
+
+        fig = go.Figure()
+        fig.add_trace(
+            go.Bar(
+                name=label_a,
+                x=short_events,
+                y=[map_a.get(e, 0) for e in events],
+                marker_color="#636EFA",
+            )
+        )
+        fig.add_trace(
+            go.Bar(
+                name=label_b,
+                x=short_events,
+                y=[map_b.get(e, 0) for e in events],
+                marker_color="#EF553B",
+            )
+        )
+        fig.update_layout(
+            barmode="group",
+            title="Wait Events \u2014 Time Waited (seconds)",
+            xaxis_title="Event",
+            yaxis_title="Time Waited (sec)",
+            height=420,
+            legend=dict(orientation="h", yanchor="bottom", y=1.02),
+        )
+        return fig
+
+    def _chart_wait_pie(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> go.Figure | None:
+        wa = data_a.get("wait_events", [])
+        wb = data_b.get("wait_events", [])
+        if not wa and not wb:
+            return None
+
+        class_key = "wait_class" if self.is_oracle else "event_type"
+
+        def aggregate_by_class(rows: list[dict]) -> tuple[list[str], list[float]]:
+            agg: dict[str, float] = {}
+            for row in rows:
+                cls = str(row.get(class_key, "Other"))
+                agg[cls] = agg.get(cls, 0) + float(row.get("time_waited_sec", 0))
+            labels = list(agg.keys())
+            values = list(agg.values())
+            return labels, values
+
+        labels_a, values_a = aggregate_by_class(wa)
+        labels_b, values_b = aggregate_by_class(wb)
+
+        if not values_a and not values_b:
+            return None
+
+        fig = make_subplots(
+            rows=1,
+            cols=2,
+            specs=[[{"type": "pie"}, {"type": "pie"}]],
+            subplot_titles=[label_a, label_b],
+        )
+        if values_a:
+            fig.add_trace(
+                go.Pie(labels=labels_a, values=values_a, hole=0.35, name=label_a),
+                row=1,
+                col=1,
+            )
+        if values_b:
+            fig.add_trace(
+                go.Pie(labels=labels_b, values=values_b, hole=0.35, name=label_b),
+                row=1,
+                col=2,
+            )
+        fig.update_layout(
+            title="Wait Time Distribution by Class",
+            height=400,
+        )
+        return fig
+
+    def _chart_sys_stats(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> go.Figure | None:
+        sa = data_a.get("sys_stats", [])
+        sb = data_b.get("sys_stats", [])
+        if not sa and not sb:
+            return None
+
+        map_a: dict[str, float] = {}
+        map_b: dict[str, float] = {}
+        all_names: list[str] = []
+
+        for row in sa:
+            name = str(row.get("name", ""))
+            if name:
+                all_names.append(name)
+                map_a[name] = float(row.get("value", 0))
+        for row in sb:
+            name = str(row.get("name", ""))
+            if name and name not in all_names:
+                all_names.append(name)
+            map_b[name] = float(row.get("value", 0))
+
+        if not all_names:
+            return None
+
+        fig = go.Figure()
+        fig.add_trace(
+            go.Bar(
+                name=label_a,
+                x=all_names,
+                y=[map_a.get(n, 0) for n in all_names],
+                marker_color="#636EFA",
+            )
+        )
+        fig.add_trace(
+            go.Bar(
+                name=label_b,
+                x=all_names,
+                y=[map_b.get(n, 0) for n in all_names],
+                marker_color="#EF553B",
+            )
+        )
+        fig.update_layout(
+            barmode="group",
+            title="System Statistics Comparison",
+            xaxis_title="Statistic",
+            yaxis_title="Value",
+            height=450,
+            xaxis_tickangle=-35,
+            legend=dict(orientation="h", yanchor="bottom", y=1.02),
+        )
+        return fig
+
+    def _chart_avg_elapsed(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> go.Figure | None:
+        ea = data_a.get("top_elapsed", [])
+        eb = data_b.get("top_elapsed", [])
+        if not ea and not eb:
+            return None
+
+        all_ids: list[str] = []
+        map_a: dict[str, float] = {}
+        map_b: dict[str, float] = {}
+
+        for row in ea[:10]:
+            sid = str(row.get("sql_id", ""))
+            if sid:
+                all_ids.append(sid)
+                map_a[sid] = float(row.get("avg_elapsed_sec", 0))
+        for row in eb[:10]:
+            sid = str(row.get("sql_id", ""))
+            if sid and sid not in all_ids:
+                all_ids.append(sid)
+            map_b[sid] = float(row.get("avg_elapsed_sec", 0))
+
+        if not all_ids:
+            return None
+
+        ids = all_ids[:12]
+        short_ids = [s[:13] for s in ids]
+
+        fig = go.Figure()
+        fig.add_trace(
+            go.Bar(
+                name=label_a,
+                x=short_ids,
+                y=[map_a.get(i, 0) for i in ids],
+                marker_color="#636EFA",
+            )
+        )
+        fig.add_trace(
+            go.Bar(
+                name=label_b,
+                x=short_ids,
+                y=[map_b.get(i, 0) for i in ids],
+                marker_color="#EF553B",
+            )
+        )
+        fig.update_layout(
+            barmode="group",
+            title="Avg Elapsed per Execution (seconds)",
+            xaxis_title="SQL ID",
+            yaxis_title="Avg Elapsed (sec)",
+            height=420,
+            legend=dict(orientation="h", yanchor="bottom", y=1.02),
+        )
+        return fig
+
+    # -- delta summary table -------------------------------------------------
+
+    def _build_delta_table(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+    ) -> list[dict[str, Any]]:
+        """Build a summary table of key metric deltas between snapshots."""
+        rows: list[dict[str, Any]] = []
+
+        # Total elapsed time across top SQL
+        total_a = sum(float(r.get("elapsed_sec", 0)) for r in data_a.get("top_sql", []))
+        total_b = sum(float(r.get("elapsed_sec", 0)) for r in data_b.get("top_sql", []))
+        rows.append(
+            self._delta_row(
+                "Total Top SQL Elapsed (sec)", total_a, total_b, label_a, label_b
+            )
+        )
+
+        # Total executions across top SQL
+        exec_a = sum(float(r.get("executions", 0)) for r in data_a.get("top_sql", []))
+        exec_b = sum(float(r.get("executions", 0)) for r in data_b.get("top_sql", []))
+        rows.append(
+            self._delta_row(
+                "Total Top SQL Executions", exec_a, exec_b, label_a, label_b
+            )
+        )
+
+        # Total wait time
+        wait_a = sum(
+            float(r.get("time_waited_sec", 0)) for r in data_a.get("wait_events", [])
+        )
+        wait_b = sum(
+            float(r.get("time_waited_sec", 0)) for r in data_b.get("wait_events", [])
+        )
+        rows.append(
+            self._delta_row("Total Wait Time (sec)", wait_a, wait_b, label_a, label_b)
+        )
+
+        # Buffer gets / shared blocks
+        buf_key = "buffer_gets" if self.is_oracle else "shared_blks_hit"
+        buf_a = sum(float(r.get(buf_key, 0)) for r in data_a.get("top_sql", []))
+        buf_b = sum(float(r.get(buf_key, 0)) for r in data_b.get("top_sql", []))
+        buf_label = "Buffer Gets" if self.is_oracle else "Shared Blocks Hit"
+        rows.append(
+            self._delta_row(f"Total {buf_label}", buf_a, buf_b, label_a, label_b)
+        )
+
+        # Disk reads / shared blocks read
+        disk_key = "disk_reads" if self.is_oracle else "shared_blks_read"
+        disk_a = sum(float(r.get(disk_key, 0)) for r in data_a.get("top_sql", []))
+        disk_b = sum(float(r.get(disk_key, 0)) for r in data_b.get("top_sql", []))
+        disk_label = "Disk Reads" if self.is_oracle else "Shared Blocks Read"
+        rows.append(
+            self._delta_row(f"Total {disk_label}", disk_a, disk_b, label_a, label_b)
+        )
+
+        # Oracle-specific system stats
+        if self.is_oracle:
+            stats_a = {
+                str(r.get("name", "")): float(r.get("value", 0))
+                for r in data_a.get("sys_stats", [])
+            }
+            stats_b = {
+                str(r.get("name", "")): float(r.get("value", 0))
+                for r in data_b.get("sys_stats", [])
+            }
+            for stat_name in [
+                "physical reads",
+                "parse count (hard)",
+                "execute count",
+                "user commits",
+                "enqueue waits",
+            ]:
+                va = stats_a.get(stat_name, 0)
+                vb = stats_b.get(stat_name, 0)
+                if va or vb:
+                    rows.append(
+                        self._delta_row(stat_name.title(), va, vb, label_a, label_b)
+                    )
+
+        return rows
+
+    @staticmethod
+    def _delta_row(
+        metric: str, val_a: float, val_b: float, label_a: str, label_b: str
+    ) -> dict[str, Any]:
+        delta = val_b - val_a
+        pct = (delta / val_a * 100) if val_a else 0
+        direction = "+" if delta > 0 else ("-" if delta < 0 else "=")
+        return {
+            "metric": metric,
+            label_a: round(val_a, 2),
+            label_b: round(val_b, 2),
+            "delta": round(delta, 2),
+            "change_pct": f"{direction}{abs(pct):.1f}%",
+        }
+
+    # -- LLM comparison analysis ---------------------------------------------
+
+    def _format_comparison_text(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+        delta_table: list[dict[str, Any]],
+    ) -> str:
+        parts = [
+            f"SNAPSHOT COMPARISON REPORT\n{'=' * 60}",
+            f"Snapshot A: {label_a}",
+            f"Snapshot B: {label_b}\n",
+            "--- DELTA SUMMARY ---",
+        ]
+        for row in delta_table:
+            parts.append(
+                f"  {row['metric']}: {row[label_a]} -> {row[label_b]} "
+                f"(delta={row['delta']}, {row['change_pct']})"
+            )
+
+        parts.append("\n--- SNAPSHOT A: TOP SQL ---")
+        for i, row in enumerate(data_a.get("top_sql", [])[:10], 1):
+            parts.append(f"  [{i}] {_fmt(row)}")
+
+        parts.append("\n--- SNAPSHOT B: TOP SQL ---")
+        for i, row in enumerate(data_b.get("top_sql", [])[:10], 1):
+            parts.append(f"  [{i}] {_fmt(row)}")
+
+        parts.append("\n--- SNAPSHOT A: WAIT EVENTS ---")
+        for i, row in enumerate(data_a.get("wait_events", [])[:10], 1):
+            parts.append(f"  [{i}] {_fmt(row)}")
+
+        parts.append("\n--- SNAPSHOT B: WAIT EVENTS ---")
+        for i, row in enumerate(data_b.get("wait_events", [])[:10], 1):
+            parts.append(f"  [{i}] {_fmt(row)}")
+
+        return "\n".join(parts)
+
+    def _get_llm_comparison(self, text: str) -> str:
+        system_prompt = (
+            "You are a senior DBA comparing two database performance snapshots. "
+            "Produce a detailed comparison report with these sections:\n\n"
+            "## Executive Summary\n"
+            "2-3 sentences on overall change in database health between the two periods.\n\n"
+            "## Key Metric Changes\n"
+            "For each metric that changed significantly (>10%), explain the change "
+            "and its likely cause. Reference specific sql_id/queryid values.\n\n"
+            "## New or Regressed SQL\n"
+            "Identify SQL that appeared in Snapshot B but not A (new workload), or SQL "
+            "whose elapsed time increased significantly. For each, explain the likely "
+            "cause and provide specific fix SQL (CREATE INDEX, ANALYZE, rewrite).\n\n"
+            "## Wait Event Changes\n"
+            "Highlight wait events that increased or decreased. Explain implications "
+            "(e.g., increased 'enq: TX - row lock contention' suggests locking issues).\n\n"
+            "## Recommendations\n"
+            "Numbered action plan sorted by impact. Each item must include:\n"
+            "- The specific sql_id/queryid/object affected\n"
+            "- The exact SQL command to execute\n"
+            "- Expected improvement\n\n"
+            "IMPORTANT: Be SPECIFIC. Always reference sql_id, queryid, or table names. "
+            "Never give generic advice. Use markdown code blocks for SQL."
+        )
+        try:
+            return self.llm.generate(prompt=text, system_prompt=system_prompt)
+        except (ConnectionError, RuntimeError) as exc:
+            return f"LLM comparison analysis failed: {exc}"
+
+
+def _fmt(row: dict[str, Any]) -> str:
+    """Format a row dict compactly."""
+    return ", ".join(f"{k}={v}" for k, v in row.items() if v is not None)

From bcd80b1de106ce0863b896247d90e1e14246880c Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 09:20:48 +0000
Subject: [PATCH 12/19] Fix AttributeError in Compare Snapshots tab:
 list_awr_snapshots returns list not dict

---
 tools/pg-assistant/app.py | 236 ++++++++++++++++++--------------------
 1 file changed, 114 insertions(+), 122 deletions(-)

diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index 6621141..12fc190 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -1101,70 +1101,66 @@ def _render_comparison(result: dict) -> None:
             analyser_cmp = PerformanceAnalyser(
                 st.session_state.db_client, st.session_state.llm_client
             )
-            snap_result = analyser_cmp.list_awr_snapshots()
-            if "error" in snap_result:
-                st.error(f"Cannot load snapshots: {snap_result['error']}")
+            snaps = analyser_cmp.list_awr_snapshots()
+            if not snaps:
+                st.info("No AWR snapshots found.")
             else:
-                snaps = snap_result.get("rows", [])
-                if not snaps:
-                    st.info("No AWR snapshots found.")
-                else:
-                    snap_ids = sorted(
-                        {int(s["snap_id"]) for s in snaps if s.get("snap_id")}
+                snap_ids = sorted(
+                    {int(s["snap_id"]) for s in snaps if s.get("snap_id")}
+                )
+                snap_labels = {
+                    int(s["snap_id"]): (
+                        f"{s['snap_id']} - {s.get('end_interval_time', '')}"
+                    )
+                    for s in snaps
+                    if s.get("snap_id")
+                }
+
+                col_a, col_b = st.columns(2)
+                with col_a:
+                    st.markdown("**Snapshot Range A (Baseline)**")
+                    a_begin = st.selectbox(
+                        "A \u2014 Begin Snap",
+                        snap_ids,
+                        index=0,
+                        key="cmp_a_begin",
+                        format_func=lambda x: snap_labels.get(x, str(x)),
+                    )
+                    a_end = st.selectbox(
+                        "A \u2014 End Snap",
+                        snap_ids,
+                        index=min(1, len(snap_ids) - 1),
+                        key="cmp_a_end",
+                        format_func=lambda x: snap_labels.get(x, str(x)),
+                    )
+                with col_b:
+                    st.markdown("**Snapshot Range B (Current)**")
+                    b_begin = st.selectbox(
+                        "B \u2014 Begin Snap",
+                        snap_ids,
+                        index=max(0, len(snap_ids) - 2),
+                        key="cmp_b_begin",
+                        format_func=lambda x: snap_labels.get(x, str(x)),
+                    )
+                    b_end = st.selectbox(
+                        "B \u2014 End Snap",
+                        snap_ids,
+                        index=len(snap_ids) - 1,
+                        key="cmp_b_end",
+                        format_func=lambda x: snap_labels.get(x, str(x)),
                     )
-                    snap_labels = {
-                        int(s["snap_id"]): (
-                            f"{s['snap_id']} - {s.get('end_interval_time', '')}"
-                        )
-                        for s in snaps
-                        if s.get("snap_id")
-                    }
-
-                    col_a, col_b = st.columns(2)
-                    with col_a:
-                        st.markdown("**Snapshot Range A (Baseline)**")
-                        a_begin = st.selectbox(
-                            "A \u2014 Begin Snap",
-                            snap_ids,
-                            index=0,
-                            key="cmp_a_begin",
-                            format_func=lambda x: snap_labels.get(x, str(x)),
-                        )
-                        a_end = st.selectbox(
-                            "A \u2014 End Snap",
-                            snap_ids,
-                            index=min(1, len(snap_ids) - 1),
-                            key="cmp_a_end",
-                            format_func=lambda x: snap_labels.get(x, str(x)),
-                        )
-                    with col_b:
-                        st.markdown("**Snapshot Range B (Current)**")
-                        b_begin = st.selectbox(
-                            "B \u2014 Begin Snap",
-                            snap_ids,
-                            index=max(0, len(snap_ids) - 2),
-                            key="cmp_b_begin",
-                            format_func=lambda x: snap_labels.get(x, str(x)),
-                        )
-                        b_end = st.selectbox(
-                            "B \u2014 End Snap",
-                            snap_ids,
-                            index=len(snap_ids) - 1,
-                            key="cmp_b_end",
-                            format_func=lambda x: snap_labels.get(x, str(x)),
-                        )
 
-                    if st.button("\U0001f50d Compare Snapshots", key="cmp_ora_btn"):
-                        if a_begin >= a_end:
-                            st.error("Range A: Begin snap must be less than End snap.")
-                        elif b_begin >= b_end:
-                            st.error("Range B: Begin snap must be less than End snap.")
-                        else:
-                            with st.spinner("Comparing snapshots\u2026"):
-                                result = comparator.compare_oracle(
-                                    a_begin, a_end, b_begin, b_end
-                                )
-                            _render_comparison(result)
+                if st.button("\U0001f50d Compare Snapshots", key="cmp_ora_btn"):
+                    if a_begin >= a_end:
+                        st.error("Range A: Begin snap must be less than End snap.")
+                    elif b_begin >= b_end:
+                        st.error("Range B: Begin snap must be less than End snap.")
+                    else:
+                        with st.spinner("Comparing snapshots\u2026"):
+                            result = comparator.compare_oracle(
+                                a_begin, a_end, b_begin, b_end
+                            )
+                        _render_comparison(result)
 
         elif db_type == DB_TYPE_POSTGRESQL:
             cmp_mode = st.radio(
@@ -1178,70 +1174,66 @@ def _render_comparison(result: dict) -> None:
                 analyser_cmp = PerformanceAnalyser(
                     st.session_state.db_client, st.session_state.llm_client
                 )
-                samp_result = analyser_cmp.list_pgprofile_samples()
-                if "error" in samp_result:
-                    st.error(f"Cannot load pgProfile samples: {samp_result['error']}")
+                samps = analyser_cmp.list_pgprofile_samples()
+                if not samps:
+                    st.info("No pgProfile samples found.")
                 else:
-                    samps = samp_result.get("rows", [])
-                    if not samps:
-                        st.info("No pgProfile samples found.")
-                    else:
-                        samp_ids = sorted(
-                            {int(s["sample_id"]) for s in samps if s.get("sample_id")}
+                    samp_ids = sorted(
+                        {int(s["sample_id"]) for s in samps if s.get("sample_id")}
+                    )
+                    samp_labels = {
+                        int(s["sample_id"]): (
+                            f"{s['sample_id']} - {s.get('sample_time', '')}"
                         )
-                        samp_labels = {
-                            int(s["sample_id"]): (
-                                f"{s['sample_id']} - {s.get('sample_time', '')}"
-                            )
-                            for s in samps
-                            if s.get("sample_id")
-                        }
-
-                        col_a, col_b = st.columns(2)
-                        with col_a:
-                            st.markdown("**Sample Range A (Baseline)**")
-                            sa_begin = st.selectbox(
-                                "A \u2014 Begin Sample",
-                                samp_ids,
-                                index=0,
-                                key="cmp_sa_begin",
-                                format_func=lambda x: samp_labels.get(x, str(x)),
-                            )
-                            sa_end = st.selectbox(
-                                "A \u2014 End Sample",
-                                samp_ids,
-                                index=min(1, len(samp_ids) - 1),
-                                key="cmp_sa_end",
-                                format_func=lambda x: samp_labels.get(x, str(x)),
-                            )
-                        with col_b:
-                            st.markdown("**Sample Range B (Current)**")
-                            sb_begin = st.selectbox(
-                                "B \u2014 Begin Sample",
-                                samp_ids,
-                                index=max(0, len(samp_ids) - 2),
-                                key="cmp_sb_begin",
-                                format_func=lambda x: samp_labels.get(x, str(x)),
-                            )
-                            sb_end = st.selectbox(
-                                "B \u2014 End Sample",
-                                samp_ids,
-                                index=len(samp_ids) - 1,
-                                key="cmp_sb_end",
-                                format_func=lambda x: samp_labels.get(x, str(x)),
-                            )
+                        for s in samps
+                        if s.get("sample_id")
+                    }
 
-                        if st.button("\U0001f50d Compare Samples", key="cmp_pg_btn"):
-                            if sa_begin >= sa_end:
-                                st.error("Range A: Begin must be less than End.")
-                            elif sb_begin >= sb_end:
-                                st.error("Range B: Begin must be less than End.")
-                            else:
-                                with st.spinner("Comparing samples\u2026"):
-                                    result = comparator.compare_pgprofile(
-                                        sa_begin, sa_end, sb_begin, sb_end
-                                    )
-                                _render_comparison(result)
+                    col_a, col_b = st.columns(2)
+                    with col_a:
+                        st.markdown("**Sample Range A (Baseline)**")
+                        sa_begin = st.selectbox(
+                            "A \u2014 Begin Sample",
+                            samp_ids,
+                            index=0,
+                            key="cmp_sa_begin",
+                            format_func=lambda x: samp_labels.get(x, str(x)),
+                        )
+                        sa_end = st.selectbox(
+                            "A \u2014 End Sample",
+                            samp_ids,
+                            index=min(1, len(samp_ids) - 1),
+                            key="cmp_sa_end",
+                            format_func=lambda x: samp_labels.get(x, str(x)),
+                        )
+                    with col_b:
+                        st.markdown("**Sample Range B (Current)**")
+                        sb_begin = st.selectbox(
+                            "B \u2014 Begin Sample",
+                            samp_ids,
+                            index=max(0, len(samp_ids) - 2),
+                            key="cmp_sb_begin",
+                            format_func=lambda x: samp_labels.get(x, str(x)),
+                        )
+                        sb_end = st.selectbox(
+                            "B \u2014 End Sample",
+                            samp_ids,
+                            index=len(samp_ids) - 1,
+                            key="cmp_sb_end",
+                            format_func=lambda x: samp_labels.get(x, str(x)),
+                        )
+
+                    if st.button("\U0001f50d Compare Samples", key="cmp_pg_btn"):
+                        if sa_begin >= sa_end:
+                            st.error("Range A: Begin must be less than End.")
+                        elif sb_begin >= sb_end:
+                            st.error("Range B: Begin must be less than End.")
+                        else:
+                            with st.spinner("Comparing samples\u2026"):
+                                result = comparator.compare_pgprofile(
+                                    sa_begin, sa_end, sb_begin, sb_end
+                                )
+                            _render_comparison(result)
 
             else:
                 st.info(

From af3381824c7c7e557bd5dd1f94ed6707b403a495 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 09:49:18 +0000
Subject: [PATCH 13/19] Detect PostgreSQL version and use version-aware
 bgwriter/checkpoint queries (PG 17+ compat)

---
 tools/pg-assistant/auto_analyse.py | 55 +++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 4 deletions(-)

diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index 7dd305e..45066de 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -380,7 +380,9 @@
     WHERE datname = current_database()
 """
 
-_PG_BGWRITER = """
+# PostgreSQL < 17: checkpoint columns live in pg_stat_bgwriter.
+# PostgreSQL >= 17: they moved to pg_stat_checkpointer with renamed columns.
+_PG_BGWRITER_LEGACY = """
     SELECT
         checkpoints_timed, checkpoints_req,
         buffers_checkpoint, buffers_clean, buffers_backend,
@@ -388,6 +390,18 @@
     FROM pg_stat_bgwriter
 """
 
+_PG_BGWRITER_V17 = """
+    SELECT
+        num_timed AS checkpoints_timed,
+        num_requested AS checkpoints_req,
+        buffers_written AS buffers_checkpoint,
+        bg.buffers_clean,
+        bg.buffers_alloc AS buffers_backend,
+        bg.maxwritten_clean
+    FROM pg_stat_checkpointer cp
+    CROSS JOIN pg_stat_bgwriter bg
+"""
+
 _PG_UNUSED_INDEXES = """
     SELECT
         schemaname, relname, indexrelname,
@@ -753,7 +767,7 @@
     ORDER BY count DESC
 """
 
-_PG_CHECKPOINT_STATS = """
+_PG_CHECKPOINT_STATS_LEGACY = """
     SELECT
         checkpoints_timed,
         checkpoints_req,
@@ -767,6 +781,21 @@
     FROM pg_stat_bgwriter
 """
 
+_PG_CHECKPOINT_STATS_V17 = """
+    SELECT
+        cp.num_timed AS checkpoints_timed,
+        cp.num_requested AS checkpoints_req,
+        cp.buffers_written AS buffers_checkpoint,
+        bg.buffers_clean,
+        bg.buffers_alloc AS buffers_backend,
+        bg.maxwritten_clean,
+        ROUND(bg.buffers_alloc::numeric /
+              GREATEST(cp.buffers_written + bg.buffers_clean + bg.buffers_alloc, 1)
+              * 100, 2) AS backend_write_pct
+    FROM pg_stat_checkpointer cp
+    CROSS JOIN pg_stat_bgwriter bg
+"""
+
 ANALYSIS_SYSTEM_PROMPT = (
     "You are a senior DBA and database performance engineer performing a deep-dive "
     "analysis. You have been given detailed performance data including SQL IDs/query IDs, "
@@ -1049,8 +1078,26 @@ def _collect_pgprofile(self, begin_sample: int, end_sample: int) -> dict[str, An
 
     # -- PostgreSQL collection -----------------------------------------------
 
+    def _get_pg_major_version(self) -> int:
+        """Return the PostgreSQL major version number (e.g. 14, 15, 16, 17)."""
+        result = self.db_client.execute_query(
+            "SELECT current_setting('server_version_num')::int AS ver"
+        )
+        if "error" in result:
+            return 0
+        rows = result.get("rows", [])
+        if rows:
+            # server_version_num is e.g. 170001 for 17.1, 160004 for 16.4
+            return int(rows[0].get("ver", 0)) // 10000
+        return 0
+
     def _collect_postgresql(self) -> dict[str, Any]:
         sections: dict[str, Any] = {}
+        pg_major = self._get_pg_major_version()
+        bgwriter_sql = _PG_BGWRITER_V17 if pg_major >= 17 else _PG_BGWRITER_LEGACY
+        checkpoint_sql = (
+            _PG_CHECKPOINT_STATS_V17 if pg_major >= 17 else _PG_CHECKPOINT_STATS_LEGACY
+        )
         queries = {
             "top_cpu_queries": _PG_TOP_CPU_QUERIES,
             "top_queries": _PG_TOP_QUERIES,
@@ -1061,14 +1108,14 @@ def _collect_postgresql(self) -> dict[str, Any]:
             "stale_stats_vacuum": _PG_STALE_STATS,
             "table_stats": _PG_TABLE_STATS,
             "database_stats": _PG_DB_STATS,
-            "bgwriter_stats": _PG_BGWRITER,
+            "bgwriter_stats": bgwriter_sql,
             "unused_indexes": _PG_UNUSED_INDEXES,
             "lock_waits": _PG_LOCK_WAITS,
             "bloat_estimate": _PG_BLOAT_ESTIMATE,
             "sequence_cache_issues": _PG_SEQUENCE_CACHE,
             "temp_file_usage": _PG_TEMP_FILE_USAGE,
             "connection_stats": _PG_CONNECTION_STATS,
-            "checkpoint_stats": _PG_CHECKPOINT_STATS,
+            "checkpoint_stats": checkpoint_sql,
         }
         for name, sql in queries.items():
             result = self.db_client.execute_query(sql)

From 740214931d75f220395244efcfc00ff010fd4d83 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 10:36:44 +0000
Subject: [PATCH 14/19] Fix LLM hallucination: simplify system prompts, remove
 example placeholders, add data-grounding instructions

---
 tools/pg-assistant/auto_analyse.py     | 82 +++++++-------------------
 tools/pg-assistant/snapshot_compare.py | 24 +++-----
 2 files changed, 28 insertions(+), 78 deletions(-)

diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index 45066de..f15857f 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -797,74 +797,29 @@
 """
 
 ANALYSIS_SYSTEM_PROMPT = (
-    "You are a senior DBA and database performance engineer performing a deep-dive "
-    "analysis. You have been given detailed performance data including SQL IDs/query IDs, "
-    "execution plans, full table scans, existing indexes, stats freshness, row contention "
-    "events, sequence caching issues, and other best-practice metrics.\n\n"
-    "Produce the following sections:\n\n"
+    "You are a senior DBA analysing REAL performance data from a live database.\n\n"
+    "CRITICAL RULES — violating any of these makes your analysis useless:\n"
+    "- ONLY reference sql_ids, queryids, table names, and SQL text that appear "
+    "in the data below. NEVER invent fake IDs or placeholder names.\n"
+    "- If a section has '(no data)', say 'No issues found' and move on.\n"
+    "- For every problematic SQL, copy the ACTUAL query text from the data into "
+    "a ```sql code block.\n"
+    "- Provide EXACT fix commands (CREATE INDEX, ANALYZE, ALTER SEQUENCE, etc.) "
+    "with real table/column names from the data.\n"
+    "- Skip any section where the data shows no problems.\n"
+    "- Never output generic advice or example/template text.\n\n"
+    "Produce these sections (skip sections with no relevant data):\n"
     "## Executive Summary\n"
-    "2-3 sentences summarising the overall database health and biggest concern.\n\n"
     "## High Elapsed Time SQL\n"
-    "For EACH SQL with high average elapsed time per execution (reference sql_id/queryid):\n"
-    "- Quote the sql_id / queryid, avg elapsed, total elapsed, and a snippet\n"
-    "- Explain WHY it is slow (full table scan, missing index, bad join, bad stats)\n"
-    "- Provide the EXACT fix SQL (CREATE INDEX, ANALYZE, rewrite, etc.)\n\n"
     "## High Execution Count SQL\n"
-    "For SQL executed thousands of times:\n"
-    "- Even small per-execution cost adds up; flag these with sql_id/queryid\n"
-    "- Suggest caching, batching, or query consolidation where applicable\n"
-    "- Provide exact fix SQL if index or rewrite would help\n\n"
     "## Full Table Scans\n"
-    "List every table being full-scanned with the sql_id causing it.\n"
-    "- For each, check the existing indexes section — if an index already exists "
-    "that should have been used, suggest gathering fresh stats or checking predicates.\n"
-    "- If no suitable index exists, provide the exact CREATE INDEX statement.\n\n"
-    "## Row Contention & Locking Issues\n"
-    "Analyse the row contention / enqueue wait events data:\n"
-    "- Flag 'enq: TX - row lock contention' and similar events with wait times\n"
-    "- Identify the likely cause (hot blocks, ITL contention, poor sequence caching)\n"
-    "- Provide fixes: increase INITRANS, reduce transaction scope, batch commits\n\n"
+    "## Row Contention & Locking\n"
     "## Sequence Caching Issues\n"
-    "For sequences with NOCACHE or CACHE 1:\n"
-    "- Explain the performance impact (row cache lock waits, redo contention)\n"
-    "- Provide exact ALTER SEQUENCE ... CACHE 20 (or higher) statements\n"
-    "- Flag ORDER sequences that may need NOORDER for better performance\n\n"
     "## Missing / Recommended Indexes\n"
-    "Based on the query patterns (WHERE, JOIN, ORDER BY columns visible in SQL text), "
-    "suggest specific CREATE INDEX statements. Reference the sql_id/queryid that "
-    "would benefit.\n\n"
-    "## Stale Statistics / Vacuum / Bloat Issues\n"
-    "List tables with stale or missing stats. Provide exact ANALYZE / DBMS_STATS "
-    "commands. For PostgreSQL, flag tables with high dead-tuple ratios needing VACUUM "
-    "and estimate bloat. For Oracle, flag tables not analysed in 7+ days.\n\n"
-    "## Temp Space / Sort Issues\n"
-    "Flag queries spilling to temp (temp_blks_read/written for PG, sorts (disk) for "
-    "Oracle). Suggest work_mem increase, index to avoid sort, or query rewrite.\n\n"
+    "## Stale Statistics / Vacuum / Bloat\n"
     "## Unused Indexes\n"
-    "List indexes that have never been scanned and recommend dropping them "
-    "(provide DROP INDEX statements).\n\n"
-    "## Checkpoint / Redo / WAL Issues\n"
-    "For Oracle: flag excessive redo log switches (>6/hour). "
-    "For PostgreSQL: flag high backend_write_pct (buffers_backend vs checkpoint). "
-    "Suggest redo log sizing or checkpoint_completion_target tuning.\n\n"
+    "## Checkpoint / WAL Issues\n"
     "## Action Plan (Priority Order)\n"
-    "Numbered list of actions sorted by impact. Each action must include:\n"
-    "- The specific sql_id / queryid / table / sequence affected\n"
-    "- The exact SQL command to execute\n"
-    "- Expected improvement\n\n"
-    "IMPORTANT RULES:\n"
-    "1. Be SPECIFIC — always reference sql_id, queryid, or table name.\n"
-    "2. Always QUOTE the full SQL text provided in the data alongside the sql_id/queryid. "
-    "Show the complete query text so the reader can understand exactly which SQL is problematic.\n"
-    "3. Never give generic advice like 'add indexes where needed'.\n"
-    "4. Exclude all system/internal queries — focus only on user application SQL.\n"
-    "5. Use markdown formatting with code blocks for SQL.\n"
-    "6. For each problematic SQL, show it in a code block like:\n"
-    "   ```sql\n"
-    "   -- sql_id: ABC123\n"
-    "   SELECT ... (full query text from the data)\n"
-    "   ```\n"
-    "7. Then explain the issue and provide the fix SQL in another code block."
 )
 
 
@@ -1131,7 +1086,12 @@ def _collect_postgresql(self) -> dict[str, Any]:
     def _format_report(self, data: dict[str, Any]) -> str:
         """Format collected data into a human-readable report for the LLM."""
         db_type = data.get("db_type", "unknown")
-        parts = [f"DATABASE PERFORMANCE REPORT ({db_type.upper()})\n{'=' * 60}\n"]
+        parts = [
+            f"REAL DATABASE PERFORMANCE DATA ({db_type.upper()})\n{'=' * 60}\n",
+            "Below is REAL data collected from a live database. "
+            "Analyse ONLY this data. Do NOT invent sql_ids, table names, or queries "
+            "that do not appear below.\n",
+        ]
 
         for section_name, section_data in data.items():
             if section_name in ("db_type", "snap_range", "sample_range"):
diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py
index 518f7fc..0016333 100644
--- a/tools/pg-assistant/snapshot_compare.py
+++ b/tools/pg-assistant/snapshot_compare.py
@@ -856,27 +856,17 @@ def _format_comparison_text(
 
     def _get_llm_comparison(self, text: str) -> str:
         system_prompt = (
-            "You are a senior DBA comparing two database performance snapshots. "
-            "Produce a detailed comparison report with these sections:\n\n"
+            "You are a senior DBA comparing two REAL database snapshots.\n\n"
+            "CRITICAL: ONLY reference sql_ids, queryids, table names, and SQL text "
+            "that appear in the data below. NEVER invent fake IDs or placeholders.\n\n"
+            "Produce these sections (skip sections with no relevant data):\n"
             "## Executive Summary\n"
-            "2-3 sentences on overall change in database health between the two periods.\n\n"
             "## Key Metric Changes\n"
-            "For each metric that changed significantly (>10%), explain the change "
-            "and its likely cause. Reference specific sql_id/queryid values.\n\n"
             "## New or Regressed SQL\n"
-            "Identify SQL that appeared in Snapshot B but not A (new workload), or SQL "
-            "whose elapsed time increased significantly. For each, explain the likely "
-            "cause and provide specific fix SQL (CREATE INDEX, ANALYZE, rewrite).\n\n"
             "## Wait Event Changes\n"
-            "Highlight wait events that increased or decreased. Explain implications "
-            "(e.g., increased 'enq: TX - row lock contention' suggests locking issues).\n\n"
-            "## Recommendations\n"
-            "Numbered action plan sorted by impact. Each item must include:\n"
-            "- The specific sql_id/queryid/object affected\n"
-            "- The exact SQL command to execute\n"
-            "- Expected improvement\n\n"
-            "IMPORTANT: Be SPECIFIC. Always reference sql_id, queryid, or table names. "
-            "Never give generic advice. Use markdown code blocks for SQL."
+            "## Recommendations\n\n"
+            "For each problematic SQL, copy the ACTUAL query text from the data "
+            "into a ```sql code block. Provide exact fix commands."
         )
         try:
             return self.llm.generate(prompt=text, system_prompt=system_prompt)

From fe6674fecb0e5dbcdad3c9f944fe71ee84849ca7 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 10:48:30 +0000
Subject: [PATCH 15/19] Fix LLM hallucination v2: move instructions AFTER data
 in prompt instead of system prompt (codellama is a completion model, not
 instruction-following)

---
 tools/pg-assistant/auto_analyse.py     | 58 ++++++++++++++++----------
 tools/pg-assistant/snapshot_compare.py | 23 ++++++----
 2 files changed, 50 insertions(+), 31 deletions(-)

diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index f15857f..e192163 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -796,30 +796,46 @@
     CROSS JOIN pg_stat_bgwriter bg
 """
 
-ANALYSIS_SYSTEM_PROMPT = (
-    "You are a senior DBA analysing REAL performance data from a live database.\n\n"
-    "CRITICAL RULES — violating any of these makes your analysis useless:\n"
-    "- ONLY reference sql_ids, queryids, table names, and SQL text that appear "
-    "in the data below. NEVER invent fake IDs or placeholder names.\n"
-    "- If a section has '(no data)', say 'No issues found' and move on.\n"
-    "- For every problematic SQL, copy the ACTUAL query text from the data into "
-    "a ```sql code block.\n"
-    "- Provide EXACT fix commands (CREATE INDEX, ANALYZE, ALTER SEQUENCE, etc.) "
-    "with real table/column names from the data.\n"
-    "- Skip any section where the data shows no problems.\n"
-    "- Never output generic advice or example/template text.\n\n"
-    "Produce these sections (skip sections with no relevant data):\n"
+# Instruction block appended AFTER the data in the prompt.
+# codellama is a completion model — it works best when instructions follow
+# the data so it "completes" the report rather than fabricating from the
+# system prompt.
+_ANALYSIS_INSTRUCTION = (
+    "\n\n"
+    "=" * 60 + "\n"
+    "TASK: Analyse the REAL data above. Write a report that ONLY references "
+    "the sql_ids, queryids, table names, and SQL text shown above. "
+    "Do NOT invent any IDs, table names, or queries.\n\n"
+    "For each section below, if the data above has no relevant rows, "
+    "write 'No issues found.' and move on.\n\n"
     "## Executive Summary\n"
+    "2-3 sentences about the biggest issues found in the data above.\n\n"
     "## High Elapsed Time SQL\n"
+    "List each sql_id/queryid from the HIGH ELAPSED PER EXEC section above. "
+    "Copy its query_text. Explain why it is slow and give a CREATE INDEX or fix.\n\n"
     "## High Execution Count SQL\n"
+    "List each sql_id/queryid from the HIGH EXECUTION COUNT section above. "
+    "Copy its query_text. Suggest caching or indexing.\n\n"
     "## Full Table Scans\n"
+    "List tables from the SEQ SCAN TABLES section above with high seq_scan counts. "
+    "Suggest CREATE INDEX statements using real column names.\n\n"
     "## Row Contention & Locking\n"
+    "List events from ROW CONTENTION or LOCK WAITS sections above. Suggest fixes.\n\n"
     "## Sequence Caching Issues\n"
+    "List sequences from SEQUENCE CACHE ISSUES section above. "
+    "Give ALTER SEQUENCE ... CACHE 20 statements.\n\n"
     "## Missing / Recommended Indexes\n"
+    "Based on query WHERE/JOIN columns visible in the SQL text above, "
+    "suggest specific CREATE INDEX statements.\n\n"
     "## Stale Statistics / Vacuum / Bloat\n"
+    "List tables from STALE STATS or BLOAT ESTIMATE sections above. "
+    "Give ANALYZE or VACUUM commands.\n\n"
     "## Unused Indexes\n"
+    "List indexes from UNUSED INDEXES section above. Give DROP INDEX statements.\n\n"
     "## Checkpoint / WAL Issues\n"
-    "## Action Plan (Priority Order)\n"
+    "Review CHECKPOINT STATS and BGWRITER STATS sections above. Flag any issues.\n\n"
+    "## Action Plan\n"
+    "Numbered list of fixes sorted by impact, using ONLY data from above.\n"
 )
 
 
@@ -900,11 +916,11 @@ def check_pg_stat_statements(self) -> bool:
 
     def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]:
         report_text = self._format_report(raw_data)
+        # Append instructions AFTER the data so codellama "completes" a
+        # real analysis rather than hallucinating from a system prompt.
+        full_prompt = report_text + _ANALYSIS_INSTRUCTION
         try:
-            llm_response = self.llm_client.generate(
-                prompt=report_text,
-                system_prompt=ANALYSIS_SYSTEM_PROMPT,
-            )
+            llm_response = self.llm_client.generate(prompt=full_prompt)
         except (ConnectionError, RuntimeError) as exc:
             llm_response = f"LLM analysis failed: {exc}"
         return {
@@ -914,11 +930,9 @@ def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]:
         }
 
     def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]:
+        full_prompt = report_text + _ANALYSIS_INSTRUCTION
         try:
-            llm_response = self.llm_client.generate(
-                prompt=report_text,
-                system_prompt=ANALYSIS_SYSTEM_PROMPT,
-            )
+            llm_response = self.llm_client.generate(prompt=full_prompt)
         except (ConnectionError, RuntimeError) as exc:
             llm_response = f"LLM analysis failed: {exc}"
         return {
diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py
index 0016333..3f4cc52 100644
--- a/tools/pg-assistant/snapshot_compare.py
+++ b/tools/pg-assistant/snapshot_compare.py
@@ -855,21 +855,26 @@ def _format_comparison_text(
         return "\n".join(parts)
 
     def _get_llm_comparison(self, text: str) -> str:
-        system_prompt = (
-            "You are a senior DBA comparing two REAL database snapshots.\n\n"
-            "CRITICAL: ONLY reference sql_ids, queryids, table names, and SQL text "
-            "that appear in the data below. NEVER invent fake IDs or placeholders.\n\n"
-            "Produce these sections (skip sections with no relevant data):\n"
+        # Append instructions AFTER the data so codellama "completes" a
+        # real analysis rather than hallucinating from a system prompt.
+        instruction = (
+            "\n\n" + "=" * 60 + "\n"
+            "TASK: Compare the two snapshots above. Write a report that ONLY "
+            "references sql_ids, queryids, table names, and SQL text shown above. "
+            "Do NOT invent any IDs, table names, or queries.\n\n"
             "## Executive Summary\n"
+            "What changed between Snapshot A and Snapshot B?\n\n"
             "## Key Metric Changes\n"
+            "List metrics from the DELTA SUMMARY above that changed >10%.\n\n"
             "## New or Regressed SQL\n"
+            "SQL that appeared or got worse in Snapshot B. Copy query_text.\n\n"
             "## Wait Event Changes\n"
-            "## Recommendations\n\n"
-            "For each problematic SQL, copy the ACTUAL query text from the data "
-            "into a ```sql code block. Provide exact fix commands."
+            "Wait events that increased or decreased between snapshots.\n\n"
+            "## Recommendations\n"
+            "Numbered action plan using ONLY data from above.\n"
         )
         try:
-            return self.llm.generate(prompt=text, system_prompt=system_prompt)
+            return self.llm.generate(prompt=text + instruction)
         except (ConnectionError, RuntimeError) as exc:
             return f"LLM comparison analysis failed: {exc}"
 

From 390ce71387f16c972a244409096e8428cd4e92f5 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 11:00:35 +0000
Subject: [PATCH 16/19] Replace LLM-based analysis with programmatic Python
 analysis engine - Python code now identifies all issues (high elapsed SQL,
 full table scans, sequence caching, stale stats, unused indexes, etc.) with
 real sql_ids, table names, and query text - LLM only provides a brief
 supplementary summary of pre-identified findings - Same hybrid approach
 applied to snapshot comparison

---
 tools/pg-assistant/auto_analyse.py     | 521 ++++++++++++++++++++++---
 tools/pg-assistant/snapshot_compare.py | 168 ++++++--
 2 files changed, 618 insertions(+), 71 deletions(-)

diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index e192163..db583f2 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -796,47 +796,449 @@
     CROSS JOIN pg_stat_bgwriter bg
 """
 
-# Instruction block appended AFTER the data in the prompt.
-# codellama is a completion model — it works best when instructions follow
-# the data so it "completes" the report rather than fabricating from the
-# system prompt.
-_ANALYSIS_INSTRUCTION = (
-    "\n\n"
-    "=" * 60 + "\n"
-    "TASK: Analyse the REAL data above. Write a report that ONLY references "
-    "the sql_ids, queryids, table names, and SQL text shown above. "
-    "Do NOT invent any IDs, table names, or queries.\n\n"
-    "For each section below, if the data above has no relevant rows, "
-    "write 'No issues found.' and move on.\n\n"
-    "## Executive Summary\n"
-    "2-3 sentences about the biggest issues found in the data above.\n\n"
-    "## High Elapsed Time SQL\n"
-    "List each sql_id/queryid from the HIGH ELAPSED PER EXEC section above. "
-    "Copy its query_text. Explain why it is slow and give a CREATE INDEX or fix.\n\n"
-    "## High Execution Count SQL\n"
-    "List each sql_id/queryid from the HIGH EXECUTION COUNT section above. "
-    "Copy its query_text. Suggest caching or indexing.\n\n"
-    "## Full Table Scans\n"
-    "List tables from the SEQ SCAN TABLES section above with high seq_scan counts. "
-    "Suggest CREATE INDEX statements using real column names.\n\n"
-    "## Row Contention & Locking\n"
-    "List events from ROW CONTENTION or LOCK WAITS sections above. Suggest fixes.\n\n"
-    "## Sequence Caching Issues\n"
-    "List sequences from SEQUENCE CACHE ISSUES section above. "
-    "Give ALTER SEQUENCE ... CACHE 20 statements.\n\n"
-    "## Missing / Recommended Indexes\n"
-    "Based on query WHERE/JOIN columns visible in the SQL text above, "
-    "suggest specific CREATE INDEX statements.\n\n"
-    "## Stale Statistics / Vacuum / Bloat\n"
-    "List tables from STALE STATS or BLOAT ESTIMATE sections above. "
-    "Give ANALYZE or VACUUM commands.\n\n"
-    "## Unused Indexes\n"
-    "List indexes from UNUSED INDEXES section above. Give DROP INDEX statements.\n\n"
-    "## Checkpoint / WAL Issues\n"
-    "Review CHECKPOINT STATS and BGWRITER STATS sections above. Flag any issues.\n\n"
-    "## Action Plan\n"
-    "Numbered list of fixes sorted by impact, using ONLY data from above.\n"
-)
+# ---------------------------------------------------------------------------
+# Programmatic analysis — Python code does the heavy lifting, not the LLM.
+# ---------------------------------------------------------------------------
+
+
+def _safe_float(val: Any, default: float = 0.0) -> float:
+    """Safely convert a value to float."""
+    try:
+        return float(val)
+    except (TypeError, ValueError):
+        return default
+
+
+def _safe_int(val: Any, default: int = 0) -> int:
+    """Safely convert a value to int."""
+    try:
+        return int(val)
+    except (TypeError, ValueError):
+        return default
+
+
+def _truncate_sql(sql_text: str, length: int = 200) -> str:
+    """Truncate SQL text for display."""
+    if not sql_text:
+        return "(no SQL text)"
+    sql_text = str(sql_text).strip()
+    if len(sql_text) > length:
+        return sql_text[:length] + "..."
+    return sql_text
+
+
+def _build_findings_report(data: dict[str, Any]) -> str:
+    """Analyse collected data programmatically and build a markdown report.
+
+    This function does the actual analysis in Python code — identifying
+    problematic SQL, full table scans, missing indexes, etc. from the
+    real data. No LLM is involved in finding issues.
+    """
+    db_type = data.get("db_type", "unknown")
+    is_oracle = db_type == DB_TYPE_ORACLE
+    parts: list[str] = []
+    action_items: list[str] = []
+    action_idx = 0
+
+    parts.append(f"# Performance Analysis Report ({db_type.upper()})")
+    parts.append("")
+
+    # --- High Elapsed Time SQL ------------------------------------------------
+    section_key = "high_elapsed_per_exec"
+    rows = _get_rows(data, section_key)
+    parts.append("## High Elapsed Time SQL")
+    if not rows:
+        parts.append("No issues found.\n")
+    else:
+        parts.append("")
+        for row in rows:
+            sid = row.get("sql_id") or row.get("queryid") or "?"
+            avg_elapsed = _safe_float(row.get("avg_elapsed_sec", 0))
+            total_elapsed = _safe_float(
+                row.get("total_elapsed_sec") or row.get("total_exec_sec", 0)
+            )
+            execs = _safe_int(row.get("executions") or row.get("calls", 0))
+            sql_text = str(row.get("sql_text") or row.get("query_text") or "")
+            gets = _safe_int(row.get("buffer_gets") or row.get("shared_blks_read", 0))
+            parts.append(
+                f"**{'sql_id' if is_oracle else 'queryid'}: `{sid}`** — "
+                f"avg {avg_elapsed:.4f}s/exec, {execs} executions, "
+                f"total {total_elapsed:.2f}s, buffer gets/reads: {gets}"
+            )
+            if sql_text:
+                parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
+            action_idx += 1
+            action_items.append(
+                f"{action_idx}. **[HIGH ELAPSED]** Investigate `{sid}` "
+                f"(avg {avg_elapsed:.4f}s/exec). Consider adding indexes on "
+                f"columns used in WHERE/JOIN clauses."
+            )
+        parts.append("")
+
+    # --- High Execution Count SQL ---------------------------------------------
+    section_key = "high_execution_count"
+    rows = _get_rows(data, section_key)
+    parts.append("## High Execution Count SQL")
+    if not rows:
+        parts.append("No issues found.\n")
+    else:
+        parts.append("")
+        for row in rows:
+            sid = row.get("sql_id") or row.get("queryid") or "?"
+            execs = _safe_int(row.get("executions") or row.get("calls", 0))
+            total_elapsed = _safe_float(
+                row.get("total_elapsed_sec") or row.get("total_exec_sec", 0)
+            )
+            sql_text = str(row.get("sql_text") or row.get("query_text") or "")
+            parts.append(
+                f"**{'sql_id' if is_oracle else 'queryid'}: `{sid}`** — "
+                f"{execs:,} executions, total {total_elapsed:.2f}s"
+            )
+            if sql_text:
+                parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
+            if execs > 100000:
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[HIGH EXEC COUNT]** `{sid}` executed "
+                    f"{execs:,} times. Consider caching results or batching."
+                )
+        parts.append("")
+
+    # --- Full Table Scans -----------------------------------------------------
+    fts_key = "full_table_scans" if is_oracle else "seq_scan_tables"
+    rows = _get_rows(data, fts_key)
+    parts.append("## Full Table Scans")
+    if not rows:
+        parts.append("No issues found.\n")
+    else:
+        parts.append("")
+        for row in rows:
+            if is_oracle:
+                table = row.get("table_name", "?")
+                owner = row.get("object_owner", "")
+                sid = row.get("sql_id", "?")
+                execs = _safe_int(row.get("executions", 0))
+                sql_text = str(row.get("sql_text") or "")
+                parts.append(
+                    f"**Table: `{owner}.{table}`** — sql_id: `{sid}`, "
+                    f"{execs} executions"
+                )
+                if sql_text:
+                    parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[FULL TABLE SCAN]** `{owner}.{table}` "
+                    f"via sql_id `{sid}`. Review query and add appropriate index."
+                )
+            else:
+                table = row.get("relname", "?")
+                schema = row.get("schemaname", "public")
+                seq_scans = _safe_int(row.get("seq_scan", 0))
+                seq_reads = _safe_int(row.get("seq_tup_read", 0))
+                idx_scans = _safe_int(row.get("idx_scan", 0))
+                live_tup = _safe_int(row.get("n_live_tup", 0))
+                size_mb = _safe_float(row.get("table_size_mb", 0))
+                parts.append(
+                    f"**Table: `{schema}.{table}`** — "
+                    f"{seq_scans:,} seq scans, {seq_reads:,} rows read, "
+                    f"{idx_scans:,} idx scans, {live_tup:,} live rows, "
+                    f"{size_mb:.1f} MB"
+                )
+                if seq_scans > 100 and live_tup > 10000:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[SEQ SCAN]** `{schema}.{table}` has "
+                        f"{seq_scans:,} seq scans on {live_tup:,} rows. "
+                        f"Add indexes on frequently filtered columns."
+                    )
+        parts.append("")
+
+    # --- Row Contention & Locking ---------------------------------------------
+    contention_key = "row_contention" if is_oracle else "lock_waits"
+    rows = _get_rows(data, contention_key)
+    parts.append("## Row Contention & Locking")
+    if not rows:
+        parts.append("No issues found.\n")
+    else:
+        parts.append("")
+        for row in rows:
+            if is_oracle:
+                event = row.get("event", "?")
+                waits = _safe_int(row.get("total_waits", 0))
+                waited_sec = _safe_float(row.get("time_waited_sec", 0))
+                parts.append(
+                    f"**Event: `{event}`** — {waits:,} waits, "
+                    f"{waited_sec:.2f}s total wait time"
+                )
+                if waited_sec > 1:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[CONTENTION]** `{event}` — "
+                        f"{waited_sec:.2f}s total. Reduce hot-row updates, "
+                        f"increase INITRANS, or tune locking strategy."
+                    )
+            else:
+                pid = row.get("pid", "?")
+                user = row.get("usename", "?")
+                event = row.get("wait_event", "?")
+                event_type = row.get("wait_event_type", "")
+                running_sec = _safe_float(row.get("running_sec", 0))
+                query = str(row.get("query") or "")
+                parts.append(
+                    f"**PID {pid}** (user: {user}) — wait: {event_type}/{event}, "
+                    f"running {running_sec:.2f}s"
+                )
+                if query:
+                    parts.append(f"```sql\n{_truncate_sql(query, 200)}\n```")
+        parts.append("")
+
+    # --- Sequence Caching Issues -----------------------------------------------
+    seq_key = "sequence_no_cache" if is_oracle else "sequence_cache_issues"
+    rows = _get_rows(data, seq_key)
+    parts.append("## Sequence Caching Issues")
+    if not rows:
+        parts.append("No issues found.\n")
+    else:
+        parts.append("")
+        for row in rows:
+            if is_oracle:
+                owner = row.get("sequence_owner", "")
+                name = row.get("sequence_name", "?")
+                cache = _safe_int(row.get("cache_size", 0))
+                parts.append(
+                    f"**`{owner}.{name}`** — cache_size={cache} (should be >= 20)"
+                )
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[SEQUENCE]** "
+                    f"`ALTER SEQUENCE {owner}.{name} CACHE 20;`"
+                )
+            else:
+                schema = row.get("schemaname", "public")
+                name = row.get("sequencename", "?")
+                cache = _safe_int(row.get("cache_size") or 0)
+                parts.append(
+                    f"**`{schema}.{name}`** — cache_size={cache} (should be >= 20)"
+                )
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[SEQUENCE]** "
+                    f"`ALTER SEQUENCE {schema}.{name} CACHE 20;`"
+                )
+        parts.append("")
+
+    # --- Stale Statistics / Vacuum / Bloat ------------------------------------
+    if is_oracle:
+        rows = _get_rows(data, "stale_statistics")
+    else:
+        rows = _get_rows(data, "stale_stats_vacuum") + _get_rows(data, "bloat_estimate")
+        # Deduplicate by table name
+        seen_tables: set[str] = set()
+        deduped: list[dict[str, Any]] = []
+        for r in rows:
+            key = f"{r.get('schemaname', '')}.{r.get('relname', '')}"
+            if key not in seen_tables:
+                seen_tables.add(key)
+                deduped.append(r)
+        rows = deduped
+
+    parts.append("## Stale Statistics / Vacuum / Bloat")
+    if not rows:
+        parts.append("No issues found.\n")
+    else:
+        parts.append("")
+        for row in rows:
+            if is_oracle:
+                table = row.get("table_name", "?")
+                num_rows = _safe_int(row.get("num_rows", 0))
+                stale = row.get("stale_stats", "?")
+                last_analyzed = row.get("last_analyzed", "never")
+                days = _safe_float(row.get("days_since_analyzed", 0))
+                parts.append(
+                    f"**`{table}`** — {num_rows:,} rows, stale={stale}, "
+                    f"last analyzed: {last_analyzed} ({days:.0f} days ago)"
+                )
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[STALE STATS]** "
+                    f"`EXEC DBMS_STATS.GATHER_TABLE_STATS"
+                    f"(ownname=>USER, tabname=>'{table}');`"
+                )
+            else:
+                schema = row.get("schemaname", "public")
+                table = row.get("relname", "?")
+                dead = _safe_int(row.get("n_dead_tup", 0))
+                live = _safe_int(row.get("n_live_tup", 0))
+                dead_pct = _safe_float(row.get("dead_pct", 0))
+                last_vac = (
+                    row.get("last_autovacuum") or row.get("last_vacuum") or "never"
+                )
+                last_analyze = (
+                    row.get("last_autoanalyze") or row.get("last_analyze") or "never"
+                )
+                parts.append(
+                    f"**`{schema}.{table}`** — {live:,} live, {dead:,} dead "
+                    f"({dead_pct:.1f}% bloat), last vacuum: {last_vac}, "
+                    f"last analyze: {last_analyze}"
+                )
+                if dead_pct > 20 or dead > 50000:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[BLOAT]** `VACUUM ANALYZE {schema}.{table};` "
+                        f"— {dead_pct:.1f}% dead tuples"
+                    )
+                elif str(last_analyze) == "never" or str(last_analyze) == "None":
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[STALE STATS]** "
+                        f"`ANALYZE {schema}.{table};` — never analyzed"
+                    )
+        parts.append("")
+
+    # --- Unused Indexes -------------------------------------------------------
+    rows = _get_rows(data, "unused_indexes")
+    parts.append("## Unused Indexes")
+    if not rows:
+        parts.append("No issues found.\n")
+    else:
+        parts.append("")
+        for row in rows:
+            schema = row.get("schemaname", "public")
+            table = row.get("relname", "?")
+            idx_name = row.get("indexrelname", "?")
+            size_mb = _safe_float(row.get("index_size_mb", 0))
+            parts.append(
+                f"**`{schema}.{idx_name}`** on `{table}` — {size_mb:.1f} MB, 0 scans"
+            )
+            if size_mb > 1:
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[UNUSED INDEX]** "
+                    f"`DROP INDEX {schema}.{idx_name};` — "
+                    f"{size_mb:.1f} MB wasted"
+                )
+        parts.append("")
+
+    # --- Checkpoint / WAL Issues (PostgreSQL) ---------------------------------
+    if not is_oracle:
+        cp_rows = _get_rows(data, "checkpoint_stats")
+        parts.append("## Checkpoint / WAL Issues")
+        has_issue = False
+        if cp_rows:
+            row = cp_rows[0]
+            backend_pct = _safe_float(row.get("backend_write_pct", 0))
+            req = _safe_int(row.get("checkpoints_req", 0))
+            timed = _safe_int(row.get("checkpoints_timed", 0))
+            parts.append(
+                f"Checkpoints: {timed} timed, {req} requested. "
+                f"Backend write %: {backend_pct:.1f}%"
+            )
+            if backend_pct > 10:
+                has_issue = True
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[CHECKPOINT]** Backend writes are "
+                    f"{backend_pct:.1f}% of total — increase "
+                    f"`shared_buffers` and `checkpoint_completion_target`."
+                )
+            if req > timed and timed > 0:
+                has_issue = True
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[CHECKPOINT]** More requested ({req}) than "
+                    f"timed ({timed}) checkpoints — increase `max_wal_size`."
+                )
+        if not has_issue:
+            parts.append("No issues found.")
+        parts.append("")
+
+    # --- Wait Events (Oracle) -------------------------------------------------
+    if is_oracle:
+        rows = _get_rows(data, "wait_events")
+        parts.append("## Top Wait Events")
+        if not rows:
+            parts.append("No issues found.\n")
+        else:
+            parts.append("")
+            for row in rows[:10]:
+                event = row.get("event", "?")
+                waits = _safe_int(row.get("total_waits", 0))
+                waited = _safe_float(row.get("time_waited_sec", 0))
+                parts.append(f"- **`{event}`** — {waits:,} waits, {waited:.2f}s")
+            parts.append("")
+
+    # --- Temp File Usage (PostgreSQL) -----------------------------------------
+    if not is_oracle:
+        rows = _get_rows(data, "temp_file_usage")
+        if rows:
+            parts.append("## Temp File Usage")
+            parts.append("")
+            for row in rows[:5]:
+                sid = row.get("queryid", "?")
+                temp_mb = _safe_float(row.get("temp_mb", 0))
+                sql_text = str(row.get("query_text") or "")
+                parts.append(f"**queryid: `{sid}`** — {temp_mb:.1f} MB temp usage")
+                if sql_text:
+                    parts.append(f"```sql\n{_truncate_sql(sql_text, 200)}\n```")
+                if temp_mb > 100:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[TEMP FILES]** queryid `{sid}` uses "
+                        f"{temp_mb:.1f} MB temp. Increase `work_mem` or optimize "
+                        f"sort/join."
+                    )
+            parts.append("")
+
+    # --- Executive Summary & Action Plan --------------------------------------
+    summary_parts: list[str] = []
+    high_elapsed = _get_rows(data, "high_elapsed_per_exec")
+    high_exec = _get_rows(data, "high_execution_count")
+    fts = _get_rows(data, "full_table_scans" if is_oracle else "seq_scan_tables")
+    contention = _get_rows(data, "row_contention" if is_oracle else "lock_waits")
+    seqs = _get_rows(
+        data, "sequence_no_cache" if is_oracle else "sequence_cache_issues"
+    )
+
+    if high_elapsed:
+        summary_parts.append(
+            f"{len(high_elapsed)} queries with high elapsed time per execution"
+        )
+    if high_exec:
+        summary_parts.append(
+            f"{len(high_exec)} queries with very high execution counts"
+        )
+    if fts:
+        summary_parts.append(
+            f"{len(fts)} {'full table scans' if is_oracle else 'tables with heavy seq scans'}"
+        )
+    if contention:
+        summary_parts.append(f"{len(contention)} contention/lock wait events")
+    if seqs:
+        summary_parts.append(f"{len(seqs)} sequences with no/low caching")
+
+    exec_summary = (
+        "Found: " + "; ".join(summary_parts) + "."
+        if summary_parts
+        else "No significant performance issues detected."
+    )
+
+    # Build final report: summary at top, then sections, then action plan
+    header = [f"## Executive Summary\n{exec_summary}\n"]
+    footer = ["\n## Action Plan (Priority Order)\n"]
+    if action_items:
+        footer.extend(action_items)
+    else:
+        footer.append("No action items — database appears healthy.")
+
+    return "\n".join(header + parts + footer)
+
+
+def _get_rows(data: dict[str, Any], key: str) -> list[dict[str, Any]]:
+    """Safely extract a list of row dicts from collected data."""
+    val = data.get(key, [])
+    if isinstance(val, list):
+        return val
+    return []
 
 
 # ---------------------------------------------------------------------------
@@ -915,24 +1317,45 @@ def check_pg_stat_statements(self) -> bool:
     # -- internal helpers ----------------------------------------------------
 
     def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]:
+        # Programmatic analysis — Python code identifies all issues.
+        findings_report = _build_findings_report(raw_data)
         report_text = self._format_report(raw_data)
-        # Append instructions AFTER the data so codellama "completes" a
-        # real analysis rather than hallucinating from a system prompt.
-        full_prompt = report_text + _ANALYSIS_INSTRUCTION
+
+        # Ask the LLM for a brief supplementary summary only.
+        llm_summary = ""
         try:
-            llm_response = self.llm_client.generate(prompt=full_prompt)
+            llm_prompt = (
+                findings_report + "\n\n---\n"
+                "Based on the findings above, write 3-5 sentences summarising "
+                "the most critical issues and what the DBA should do first. "
+                "Do NOT repeat the full report. Do NOT invent new findings."
+            )
+            llm_summary = self.llm_client.generate(prompt=llm_prompt)
         except (ConnectionError, RuntimeError) as exc:
-            llm_response = f"LLM analysis failed: {exc}"
+            llm_summary = f"(LLM summary unavailable: {exc})"
+
+        # Combine: programmatic findings + optional LLM summary
+        analysis = findings_report
+        if llm_summary:
+            analysis += f"\n\n---\n## LLM Summary\n{llm_summary}"
+
         return {
             "raw_data": raw_data,
             "report_text": report_text,
-            "analysis": llm_response,
+            "analysis": analysis,
         }
 
     def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]:
-        full_prompt = report_text + _ANALYSIS_INSTRUCTION
+        # For uploaded reports, we still need the LLM since we don't
+        # have structured data — but we keep the prompt minimal.
+        llm_prompt = (
+            report_text + "\n\n---\n"
+            "Summarise the key performance issues in the report above. "
+            "Only reference data that actually appears above. "
+            "Do NOT invent sql_ids, table names, or metrics."
+        )
         try:
-            llm_response = self.llm_client.generate(prompt=full_prompt)
+            llm_response = self.llm_client.generate(prompt=llm_prompt)
         except (ConnectionError, RuntimeError) as exc:
             llm_response = f"LLM analysis failed: {exc}"
         return {
diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py
index 3f4cc52..22eb9de 100644
--- a/tools/pg-assistant/snapshot_compare.py
+++ b/tools/pg-assistant/snapshot_compare.py
@@ -281,11 +281,19 @@ def _build_comparison(
         # Build delta summary table
         delta_table = self._build_delta_table(data_a, data_b, label_a, label_b)
 
-        # LLM comparison summary
+        # Programmatic comparison — Python code identifies all changes.
+        findings = self._build_programmatic_comparison(
+            data_a, data_b, label_a, label_b, delta_table
+        )
+
+        # Optional LLM summary appended after the real findings.
         comparison_text = self._format_comparison_text(
             data_a, data_b, label_a, label_b, delta_table
         )
-        analysis = self._get_llm_comparison(comparison_text)
+        llm_summary = self._get_llm_comparison(comparison_text)
+        analysis = findings
+        if llm_summary:
+            analysis += f"\n\n---\n## LLM Summary\n{llm_summary}"
 
         return {
             "figures": figures,
@@ -855,28 +863,144 @@ def _format_comparison_text(
         return "\n".join(parts)
 
     def _get_llm_comparison(self, text: str) -> str:
-        # Append instructions AFTER the data so codellama "completes" a
-        # real analysis rather than hallucinating from a system prompt.
-        instruction = (
-            "\n\n" + "=" * 60 + "\n"
-            "TASK: Compare the two snapshots above. Write a report that ONLY "
-            "references sql_ids, queryids, table names, and SQL text shown above. "
-            "Do NOT invent any IDs, table names, or queries.\n\n"
-            "## Executive Summary\n"
-            "What changed between Snapshot A and Snapshot B?\n\n"
-            "## Key Metric Changes\n"
-            "List metrics from the DELTA SUMMARY above that changed >10%.\n\n"
-            "## New or Regressed SQL\n"
-            "SQL that appeared or got worse in Snapshot B. Copy query_text.\n\n"
-            "## Wait Event Changes\n"
-            "Wait events that increased or decreased between snapshots.\n\n"
-            "## Recommendations\n"
-            "Numbered action plan using ONLY data from above.\n"
-        )
+        # Build programmatic comparison findings first, then ask LLM
+        # for a brief summary only.
         try:
-            return self.llm.generate(prompt=text + instruction)
+            llm_prompt = (
+                text + "\n\n---\n"
+                "Based on the snapshot comparison data above, write 3-5 sentences "
+                "summarising what changed and what the DBA should investigate. "
+                "Do NOT invent any sql_ids, table names, or metrics."
+            )
+            return self.llm.generate(prompt=llm_prompt)
         except (ConnectionError, RuntimeError) as exc:
-            return f"LLM comparison analysis failed: {exc}"
+            return f"LLM comparison summary unavailable: {exc}"
+
+    def _build_programmatic_comparison(
+        self,
+        data_a: dict[str, Any],
+        data_b: dict[str, Any],
+        label_a: str,
+        label_b: str,
+        delta_table: list[dict[str, Any]],
+    ) -> str:
+        """Build a programmatic comparison report from real data."""
+        parts: list[str] = []
+        action_items: list[str] = []
+        action_idx = 0
+
+        parts.append("# Snapshot Comparison Analysis")
+        parts.append(f"**{label_a}** vs **{label_b}**\n")
+
+        # --- Key Metric Changes -----------------------------------------------
+        parts.append("## Key Metric Changes")
+        parts.append("")
+        significant = [r for r in delta_table if abs(float(str(r.get("delta", 0)))) > 0]
+        if not significant:
+            parts.append("No significant metric changes.\n")
+        else:
+            for row in significant:
+                metric = row.get("metric", "?")
+                val_a = row.get(label_a, 0)
+                val_b = row.get(label_b, 0)
+                delta = row.get("delta", 0)
+                pct = row.get("change_pct", "0%")
+                parts.append(
+                    f"- **{metric}**: {val_a} → {val_b} (delta: {delta}, {pct})"
+                )
+            parts.append("")
+
+        # --- Regressed / New SQL -----------------------------------------------
+        parts.append("## New or Regressed SQL")
+        sql_a = {
+            str(r.get("sql_id") or r.get("queryid", "")): r
+            for r in data_a.get("top_sql", [])
+        }
+        sql_b = {
+            str(r.get("sql_id") or r.get("queryid", "")): r
+            for r in data_b.get("top_sql", [])
+        }
+        id_key = "sql_id" if self.is_oracle else "queryid"
+        regressed: list[str] = []
+        for sid, row_b in sql_b.items():
+            if not sid:
+                continue
+            elapsed_b = float(row_b.get("elapsed_sec", 0))
+            sql_text = str(row_b.get("sql_text") or row_b.get("query_text") or "")
+            if sid in sql_a:
+                elapsed_a = float(sql_a[sid].get("elapsed_sec", 0))
+                if elapsed_a > 0 and elapsed_b > elapsed_a * 1.2:
+                    pct_change = ((elapsed_b - elapsed_a) / elapsed_a) * 100
+                    parts.append(
+                        f"**{id_key}: `{sid}`** — elapsed "
+                        f"{elapsed_a:.2f}s → {elapsed_b:.2f}s "
+                        f"(+{pct_change:.0f}%)"
+                    )
+                    if sql_text:
+                        parts.append(f"```sql\n{sql_text[:200]}\n```")
+                    regressed.append(sid)
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[REGRESSED]** `{sid}` elapsed time "
+                        f"increased {pct_change:.0f}%. Investigate plan change."
+                    )
+            else:
+                parts.append(
+                    f"**{id_key}: `{sid}`** — NEW in snapshot B, "
+                    f"elapsed {elapsed_b:.2f}s"
+                )
+                if sql_text:
+                    parts.append(f"```sql\n{sql_text[:200]}\n```")
+                regressed.append(sid)
+
+        if not regressed:
+            parts.append("No new or regressed SQL detected.\n")
+        parts.append("")
+
+        # --- Wait Event Changes ------------------------------------------------
+        parts.append("## Wait Event Changes")
+        waits_a = {
+            str(r.get("event", "")): float(r.get("time_waited_sec", 0))
+            for r in data_a.get("wait_events", [])
+        }
+        waits_b = {
+            str(r.get("event", "")): float(r.get("time_waited_sec", 0))
+            for r in data_b.get("wait_events", [])
+        }
+        wait_changes: list[str] = []
+        all_events = set(list(waits_a.keys()) + list(waits_b.keys()))
+        for evt in sorted(all_events):
+            wa = waits_a.get(evt, 0)
+            wb = waits_b.get(evt, 0)
+            if wa == 0 and wb == 0:
+                continue
+            delta = wb - wa
+            if abs(delta) > 1:
+                direction = "↑" if delta > 0 else "↓"
+                parts.append(
+                    f"- **`{evt}`**: {wa:.2f}s → {wb:.2f}s "
+                    f"({direction}{abs(delta):.2f}s)"
+                )
+                wait_changes.append(evt)
+                if delta > 10:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[WAIT EVENT]** `{evt}` increased "
+                        f"by {delta:.2f}s. Investigate root cause."
+                    )
+
+        if not wait_changes:
+            parts.append("No significant wait event changes.\n")
+        parts.append("")
+
+        # --- Action Plan -------------------------------------------------------
+        parts.append("## Recommendations")
+        if action_items:
+            parts.extend(action_items)
+        else:
+            parts.append("No significant regressions detected between snapshots.")
+
+        return "\n".join(parts)
 
 
 def _fmt(row: dict[str, Any]) -> str:

From 4f946583c5164186cf2585f33800e22e79dc4c57 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 11:15:06 +0000
Subject: [PATCH 17/19] Comprehensive programmatic analysis: cover ALL data
 sections, remove LLM summary

- Add top_cpu_queries/top_cpu_sql section (most important - always shows top SQL)
- Add top_queries/top_elapsed_sql section (deduped from CPU section)
- Add database_stats overview (cache hit ratio, connections, temp usage)
- Add connection_stats section (idle connection detection)
- Add Oracle system_stats with cache hit ratio, hard parse ratio, disk sorts
- Add Oracle SGA configuration, tablespace I/O, redo log switches, temp usage
- Add Oracle execution plans display with full scan/hash join detection
- Add Oracle parallel queries section
- Add pgProfile wait events section
- Add table_stats (top tables by activity) section
- Add AWR/pgProfile fallback for top SQL sections
- Remove LLM summary entirely (codellama keeps hallucinating generic advice)
- Update app.py labels: 'Performance Analysis Report' instead of 'AI Analysis'
- All analysis is now 100% programmatic from real DB data
---
 tools/pg-assistant/app.py              |   6 +-
 tools/pg-assistant/auto_analyse.py     | 627 +++++++++++++++++++++----
 tools/pg-assistant/snapshot_compare.py |  10 +-
 3 files changed, 547 insertions(+), 96 deletions(-)

diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index 12fc190..de1d146 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -85,10 +85,10 @@ def _render_comparison(result: dict) -> None:
                 st.markdown(f"**{title}**")
                 st.plotly_chart(fig, use_container_width=True)
 
-    # LLM analysis
+    # Comparison analysis
     analysis = result.get("analysis", "")
     if analysis:
-        st.markdown("### AI Comparison Analysis")
+        st.markdown("### Comparison Analysis")
         st.markdown(analysis)
 
 
@@ -864,7 +864,7 @@ def _render_comparison(result: dict) -> None:
             st.divider()
 
             if last.get("analysis"):
-                st.subheader("AI Analysis & Action Plan")
+                st.subheader("Performance Analysis Report")
                 st.markdown(last["analysis"])
 
             raw = last.get("raw_data", {})
diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index db583f2..d3f891f 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -843,12 +843,334 @@ def _build_findings_report(data: dict[str, Any]) -> str:
     parts.append(f"# Performance Analysis Report ({db_type.upper()})")
     parts.append("")
 
+    # =====================================================================
+    # DATABASE-LEVEL OVERVIEW
+    # =====================================================================
+
+    # --- Database Stats (PostgreSQL) ------------------------------------------
+    if not is_oracle:
+        db_rows = _get_rows(data, "database_stats")
+        if db_rows:
+            row = db_rows[0]
+            cache_hit = _safe_float(row.get("cache_hit_pct", 0))
+            commits = _safe_int(row.get("xact_commit", 0))
+            rollbacks = _safe_int(row.get("xact_rollback", 0))
+            backends = _safe_int(row.get("numbackends", 0))
+            blks_read = _safe_int(row.get("blks_read", 0))
+            blks_hit = _safe_int(row.get("blks_hit", 0))
+            temp_bytes = _safe_int(row.get("temp_bytes", 0))
+            temp_files = _safe_int(row.get("temp_files", 0))
+            parts.append("## Database Overview")
+            parts.append(
+                f"- **Cache hit ratio:** {cache_hit:.2f}%\n"
+                f"- **Active backends:** {backends}\n"
+                f"- **Transactions:** {commits:,} commits, {rollbacks:,} rollbacks\n"
+                f"- **Blocks:** {blks_hit:,} hit, {blks_read:,} read from disk\n"
+                f"- **Temp usage:** {temp_files:,} files, "
+                f"{temp_bytes / 1048576:.1f} MB"
+            )
+            if cache_hit < 95 and blks_read > 0:
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[CACHE]** Cache hit ratio is {cache_hit:.2f}% "
+                    f"(target > 99%). Increase `shared_buffers`."
+                )
+            if rollbacks > 0 and commits > 0:
+                rb_pct = rollbacks / (commits + rollbacks) * 100
+                if rb_pct > 5:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[ROLLBACKS]** {rb_pct:.1f}% rollback rate "
+                        f"({rollbacks:,}/{commits + rollbacks:,}). "
+                        f"Investigate application error handling."
+                    )
+            parts.append("")
+
+    # --- Connection Stats (PostgreSQL) ----------------------------------------
+    if not is_oracle:
+        conn_rows = _get_rows(data, "connection_stats")
+        if conn_rows:
+            parts.append("## Connection Stats")
+            for row in conn_rows:
+                state = row.get("state", "unknown") or "null"
+                count = _safe_int(row.get("count", 0))
+                wtype = row.get("wait_event_type", "None")
+                parts.append(f"- **{state}**: {count} connections (wait: {wtype})")
+            idle_count = sum(
+                _safe_int(r.get("count", 0))
+                for r in conn_rows
+                if (r.get("state") or "").startswith("idle")
+            )
+            if idle_count > 50:
+                action_idx += 1
+                action_items.append(
+                    f"{action_idx}. **[CONNECTIONS]** {idle_count} idle connections. "
+                    f"Use connection pooling (PgBouncer)."
+                )
+            parts.append("")
+
+    # --- Oracle System Stats --------------------------------------------------
+    if is_oracle:
+        sys_rows = _get_rows(data, "system_stats") or _get_rows(
+            data, "awr_system_stats"
+        )
+        if sys_rows:
+            parts.append("## System Statistics")
+            stats_map: dict[str, int] = {}
+            for row in sys_rows:
+                name = str(row.get("name", ""))
+                val = _safe_int(row.get("value", 0))
+                stats_map[name] = val
+                parts.append(f"- **{name}:** {val:,}")
+            # Cache hit ratio
+            db_gets = stats_map.get("db block gets", 0)
+            consistent = stats_map.get("consistent gets", 0)
+            phys_reads = stats_map.get("physical reads", 0)
+            logical = db_gets + consistent
+            if logical > 0:
+                hit_pct = (1 - phys_reads / logical) * 100
+                parts.append(f"\n**Buffer cache hit ratio: {hit_pct:.2f}%**")
+                if hit_pct < 95:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[CACHE]** Buffer cache hit ratio is "
+                        f"{hit_pct:.2f}% (target > 99%). "
+                        f"Increase `db_cache_size`."
+                    )
+            hard_parse = stats_map.get("parse count (hard)", 0)
+            total_parse = stats_map.get("parse count (total)", 0)
+            if total_parse > 0:
+                hard_pct = hard_parse / total_parse * 100
+                if hard_pct > 30:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[PARSING]** Hard parse ratio is "
+                        f"{hard_pct:.1f}%. Use bind variables."
+                    )
+            sorts_disk = stats_map.get("sorts (disk)", 0)
+            sorts_mem = stats_map.get("sorts (memory)", 0)
+            if sorts_disk > 0 and sorts_mem > 0:
+                disk_pct = sorts_disk / (sorts_mem + sorts_disk) * 100
+                if disk_pct > 5:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[SORTS]** {disk_pct:.1f}% sorts on disk "
+                        f"({sorts_disk:,}/{sorts_mem + sorts_disk:,}). "
+                        f"Increase `sort_area_size` / `PGA_AGGREGATE_TARGET`."
+                    )
+            parts.append("")
+
+    # --- Oracle SGA Info ------------------------------------------------------
+    if is_oracle:
+        sga_rows = _get_rows(data, "sga_info")
+        if sga_rows:
+            parts.append("## SGA Configuration")
+            for row in sga_rows:
+                name = row.get("name", "?")
+                size_mb = _safe_float(row.get("size_mb", 0))
+                parts.append(f"- **{name}:** {size_mb:.0f} MB")
+            parts.append("")
+
+    # --- Oracle Tablespace I/O ------------------------------------------------
+    if is_oracle:
+        ts_rows = _get_rows(data, "tablespace_io")
+        if ts_rows:
+            parts.append("## Tablespace I/O")
+            for row in ts_rows:
+                ts_name = row.get("tablespace_name", "?")
+                reads = _safe_int(row.get("physical_reads", 0))
+                writes = _safe_int(row.get("physical_writes", 0))
+                read_sec = _safe_float(row.get("read_time_sec", 0))
+                write_sec = _safe_float(row.get("write_time_sec", 0))
+                parts.append(
+                    f"- **`{ts_name}`** — reads: {reads:,} ({read_sec:.2f}s), "
+                    f"writes: {writes:,} ({write_sec:.2f}s)"
+                )
+                if read_sec > 10:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[I/O]** Tablespace `{ts_name}` has "
+                        f"{read_sec:.2f}s read time. Move to faster storage or "
+                        f"redistribute I/O."
+                    )
+            parts.append("")
+
+    # --- Oracle Redo Log Switches ---------------------------------------------
+    if is_oracle:
+        redo_rows = _get_rows(data, "redo_log_switches")
+        if redo_rows:
+            parts.append("## Redo Log Switches (Last 24h)")
+            for row in redo_rows:
+                hour = row.get("switch_hour", "?")
+                switches = _safe_int(row.get("switches", 0))
+                parts.append(f"- **{hour}:** {switches} switches")
+                if switches > 10:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[REDO]** {switches} log switches in hour "
+                        f"{hour}. Increase redo log file size."
+                    )
+            parts.append("")
+
+    # --- Oracle Temp Usage ----------------------------------------------------
+    if is_oracle:
+        temp_rows = _get_rows(data, "temp_usage")
+        if temp_rows:
+            parts.append("## Temp Tablespace Usage")
+            for row in temp_rows:
+                ts_name = row.get("tablespace_name", "?")
+                used_mb = _safe_float(row.get("used_mb", 0))
+                free_mb = _safe_float(row.get("free_mb", 0))
+                pct = _safe_float(row.get("pct_used", 0))
+                parts.append(
+                    f"- **`{ts_name}`** — {used_mb:.0f} MB used, "
+                    f"{free_mb:.0f} MB free ({pct:.1f}% used)"
+                )
+                if pct > 80:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[TEMP]** `{ts_name}` is {pct:.1f}% full. "
+                        f"Add temp datafile or resize."
+                    )
+            parts.append("")
+
+    # =====================================================================
+    # TOP SQL BY RESOURCE CONSUMPTION
+    # =====================================================================
+
+    # --- Top CPU SQL (always show — this is the most important section) -------
+    cpu_key = "top_cpu_sql" if is_oracle else "top_cpu_queries"
+    cpu_rows = _get_rows(data, cpu_key)
+    # Also check AWR top SQL / pgProfile top SQL as alternatives
+    if not cpu_rows:
+        cpu_rows = _get_rows(data, "awr_top_sql")
+    if not cpu_rows:
+        cpu_rows = _get_rows(data, "pgprofile_top_sql")
+    if cpu_rows:
+        parts.append("## Top SQL by CPU / Elapsed Time")
+        parts.append("")
+        for i, row in enumerate(cpu_rows[:15]):
+            sid = row.get("sql_id") or row.get("queryid") or "?"
+            id_label = "sql_id" if is_oracle else "queryid"
+            if is_oracle:
+                cpu_sec = _safe_float(row.get("cpu_sec", 0))
+                elapsed_sec = _safe_float(row.get("elapsed_sec", 0))
+                execs = _safe_int(row.get("executions", 0))
+                gets = _safe_int(row.get("buffer_gets", 0))
+                gets_per = _safe_int(row.get("gets_per_exec", 0))
+                sql_text = str(row.get("sql_text") or "")
+                parts.append(
+                    f"**{i + 1}. {id_label}: `{sid}`** — "
+                    f"CPU: {cpu_sec:.2f}s, elapsed: {elapsed_sec:.2f}s, "
+                    f"{execs:,} executions, buffer gets: {gets:,} "
+                    f"({gets_per:,}/exec)"
+                )
+            else:
+                total_sec = _safe_float(
+                    row.get("total_exec_sec") or row.get("total_exec_time", 0)
+                )
+                mean_sec = _safe_float(
+                    row.get("mean_exec_sec") or row.get("mean_exec_time", 0)
+                )
+                calls = _safe_int(row.get("calls", 0))
+                cache_hit = _safe_float(row.get("cache_hit_pct", 100))
+                blk_read = _safe_int(row.get("shared_blks_read", 0))
+                blk_hit = _safe_int(row.get("shared_blks_hit", 0))
+                parts.append(
+                    f"**{i + 1}. {id_label}: `{sid}`** — "
+                    f"total: {total_sec:.2f}s, avg: {mean_sec:.4f}s/call, "
+                    f"{calls:,} calls, cache hit: {cache_hit:.1f}%, "
+                    f"blks read: {blk_read:,}, blks hit: {blk_hit:,}"
+                )
+            sql_text = str(row.get("sql_text") or row.get("query_text") or "")
+            if sql_text:
+                parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
+            # Generate action items for top offenders
+            if i < 5:
+                if is_oracle:
+                    if gets_per > 10000:
+                        action_idx += 1
+                        action_items.append(
+                            f"{action_idx}. **[TOP CPU]** `{sid}` — "
+                            f"{gets_per:,} buffer gets/exec. "
+                            f"Review execution plan: "
+                            f"`SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR"
+                            f"('{sid}'));`"
+                        )
+                else:
+                    if cache_hit < 90 and blk_read > 1000:
+                        action_idx += 1
+                        action_items.append(
+                            f"{action_idx}. **[LOW CACHE HIT]** queryid `{sid}` — "
+                            f"cache hit {cache_hit:.1f}%, {blk_read:,} blocks read. "
+                            f"Add indexes or increase `shared_buffers`."
+                        )
+                    if mean_sec > 1.0:
+                        action_idx += 1
+                        action_items.append(
+                            f"{action_idx}. **[SLOW QUERY]** queryid `{sid}` — "
+                            f"avg {mean_sec:.4f}s/call. Run "
+                            f"`EXPLAIN (ANALYZE, BUFFERS) <query>` to investigate."
+                        )
+        parts.append("")
+
+    # --- Top Queries by Total Elapsed (fallback if different from CPU) --------
+    elapsed_key = "top_elapsed_sql" if is_oracle else "top_queries"
+    elapsed_rows = _get_rows(data, elapsed_key)
+    # Only show if we have data AND it's different from cpu_rows
+    if elapsed_rows and elapsed_key != cpu_key:
+        # Check if these are substantially different from the CPU rows
+        cpu_ids = (
+            {str(r.get("sql_id") or r.get("queryid") or "") for r in cpu_rows[:10]}
+            if cpu_rows
+            else set()
+        )
+        new_rows = [
+            r
+            for r in elapsed_rows
+            if str(r.get("sql_id") or r.get("queryid") or "") not in cpu_ids
+        ]
+        if new_rows:
+            parts.append("## Additional Top SQL by Total Elapsed Time")
+            parts.append("*(Not already listed in Top CPU section)*\n")
+            for i, row in enumerate(new_rows[:10]):
+                sid = row.get("sql_id") or row.get("queryid") or "?"
+                id_label = "sql_id" if is_oracle else "queryid"
+                if is_oracle:
+                    elapsed_sec = _safe_float(row.get("elapsed_sec", 0))
+                    execs = _safe_int(row.get("executions", 0))
+                    gets = _safe_int(row.get("buffer_gets", 0))
+                    sql_text = str(row.get("sql_text") or "")
+                    parts.append(
+                        f"**{i + 1}. {id_label}: `{sid}`** — "
+                        f"elapsed: {elapsed_sec:.2f}s, {execs:,} execs, "
+                        f"buffer gets: {gets:,}"
+                    )
+                else:
+                    total_sec = _safe_float(row.get("total_exec_sec", 0))
+                    mean_sec = _safe_float(row.get("mean_exec_sec", 0))
+                    calls = _safe_int(row.get("calls", 0))
+                    cache_hit = _safe_float(row.get("cache_hit_pct", 100))
+                    parts.append(
+                        f"**{i + 1}. {id_label}: `{sid}`** — "
+                        f"total: {total_sec:.2f}s, avg: {mean_sec:.4f}s/call, "
+                        f"{calls:,} calls, cache hit: {cache_hit:.1f}%"
+                    )
+                sql_text = str(row.get("sql_text") or row.get("query_text") or "")
+                if sql_text:
+                    parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
+            parts.append("")
+
+    # =====================================================================
+    # THRESHOLD-BASED FINDINGS
+    # =====================================================================
+
     # --- High Elapsed Time SQL ------------------------------------------------
     section_key = "high_elapsed_per_exec"
     rows = _get_rows(data, section_key)
-    parts.append("## High Elapsed Time SQL")
+    parts.append("## High Elapsed Time per Execution (> 1s avg)")
     if not rows:
-        parts.append("No issues found.\n")
+        parts.append("No queries exceed the 1s/exec threshold.\n")
     else:
         parts.append("")
         for row in rows:
@@ -860,27 +1182,37 @@ def _build_findings_report(data: dict[str, Any]) -> str:
             execs = _safe_int(row.get("executions") or row.get("calls", 0))
             sql_text = str(row.get("sql_text") or row.get("query_text") or "")
             gets = _safe_int(row.get("buffer_gets") or row.get("shared_blks_read", 0))
+            id_label = "sql_id" if is_oracle else "queryid"
             parts.append(
-                f"**{'sql_id' if is_oracle else 'queryid'}: `{sid}`** — "
-                f"avg {avg_elapsed:.4f}s/exec, {execs} executions, "
-                f"total {total_elapsed:.2f}s, buffer gets/reads: {gets}"
+                f"**{id_label}: `{sid}`** — "
+                f"avg {avg_elapsed:.4f}s/exec, {execs:,} executions, "
+                f"total {total_elapsed:.2f}s, buffer gets/reads: {gets:,}"
             )
             if sql_text:
-                parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
+                parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
             action_idx += 1
-            action_items.append(
-                f"{action_idx}. **[HIGH ELAPSED]** Investigate `{sid}` "
-                f"(avg {avg_elapsed:.4f}s/exec). Consider adding indexes on "
-                f"columns used in WHERE/JOIN clauses."
-            )
+            if is_oracle:
+                action_items.append(
+                    f"{action_idx}. **[HIGH ELAPSED]** `{sid}` "
+                    f"(avg {avg_elapsed:.4f}s/exec). Check plan: "
+                    f"`SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR"
+                    f"('{sid}'));` — add indexes on WHERE/JOIN columns."
+                )
+            else:
+                action_items.append(
+                    f"{action_idx}. **[HIGH ELAPSED]** queryid `{sid}` "
+                    f"(avg {avg_elapsed:.4f}s/exec). Run "
+                    f"`EXPLAIN (ANALYZE, BUFFERS)` on this query and "
+                    f"add indexes on filtered columns."
+                )
         parts.append("")
 
     # --- High Execution Count SQL ---------------------------------------------
     section_key = "high_execution_count"
     rows = _get_rows(data, section_key)
-    parts.append("## High Execution Count SQL")
+    parts.append("## High Execution Count SQL (> 1000 calls)")
     if not rows:
-        parts.append("No issues found.\n")
+        parts.append("No queries exceed the 1000 execution threshold.\n")
     else:
         parts.append("")
         for row in rows:
@@ -890,24 +1222,26 @@ def _build_findings_report(data: dict[str, Any]) -> str:
                 row.get("total_elapsed_sec") or row.get("total_exec_sec", 0)
             )
             sql_text = str(row.get("sql_text") or row.get("query_text") or "")
+            id_label = "sql_id" if is_oracle else "queryid"
             parts.append(
-                f"**{'sql_id' if is_oracle else 'queryid'}: `{sid}`** — "
+                f"**{id_label}: `{sid}`** — "
                 f"{execs:,} executions, total {total_elapsed:.2f}s"
             )
             if sql_text:
-                parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
+                parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
             if execs > 100000:
                 action_idx += 1
                 action_items.append(
                     f"{action_idx}. **[HIGH EXEC COUNT]** `{sid}` executed "
-                    f"{execs:,} times. Consider caching results or batching."
+                    f"{execs:,} times. Consider caching results, batching, "
+                    f"or reducing call frequency."
                 )
         parts.append("")
 
     # --- Full Table Scans -----------------------------------------------------
     fts_key = "full_table_scans" if is_oracle else "seq_scan_tables"
     rows = _get_rows(data, fts_key)
-    parts.append("## Full Table Scans")
+    parts.append("## Full Table Scans / Sequential Scans")
     if not rows:
         parts.append("No issues found.\n")
     else:
@@ -918,29 +1252,35 @@ def _build_findings_report(data: dict[str, Any]) -> str:
                 owner = row.get("object_owner", "")
                 sid = row.get("sql_id", "?")
                 execs = _safe_int(row.get("executions", 0))
+                elapsed = _safe_float(row.get("elapsed_sec", 0))
+                gets = _safe_int(row.get("buffer_gets", 0))
                 sql_text = str(row.get("sql_text") or "")
                 parts.append(
                     f"**Table: `{owner}.{table}`** — sql_id: `{sid}`, "
-                    f"{execs} executions"
+                    f"{execs:,} execs, {elapsed:.2f}s elapsed, "
+                    f"{gets:,} buffer gets"
                 )
                 if sql_text:
-                    parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
+                    parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
                 action_idx += 1
                 action_items.append(
                     f"{action_idx}. **[FULL TABLE SCAN]** `{owner}.{table}` "
-                    f"via sql_id `{sid}`. Review query and add appropriate index."
+                    f"via sql_id `{sid}`. Add index on columns in WHERE clause "
+                    f"or use hints to force index access."
                 )
             else:
                 table = row.get("relname", "?")
                 schema = row.get("schemaname", "public")
                 seq_scans = _safe_int(row.get("seq_scan", 0))
-                seq_reads = _safe_int(row.get("seq_tup_read", 0))
+                seq_tup_read = _safe_int(row.get("seq_tup_read", 0))
                 idx_scans = _safe_int(row.get("idx_scan", 0))
                 live_tup = _safe_int(row.get("n_live_tup", 0))
                 size_mb = _safe_float(row.get("table_size_mb", 0))
+                avg_rows = _safe_int(row.get("avg_rows_per_seq_scan", 0))
                 parts.append(
                     f"**Table: `{schema}.{table}`** — "
-                    f"{seq_scans:,} seq scans, {seq_reads:,} rows read, "
+                    f"{seq_scans:,} seq scans ({avg_rows:,} rows/scan avg, "
+                    f"{seq_tup_read:,} rows read), "
                     f"{idx_scans:,} idx scans, {live_tup:,} live rows, "
                     f"{size_mb:.1f} MB"
                 )
@@ -948,17 +1288,78 @@ def _build_findings_report(data: dict[str, Any]) -> str:
                     action_idx += 1
                     action_items.append(
                         f"{action_idx}. **[SEQ SCAN]** `{schema}.{table}` has "
-                        f"{seq_scans:,} seq scans on {live_tup:,} rows. "
-                        f"Add indexes on frequently filtered columns."
+                        f"{seq_scans:,} seq scans on {live_tup:,} rows "
+                        f"({size_mb:.1f} MB). Add indexes on frequently "
+                        f"filtered columns: "
+                        f"`CREATE INDEX ON {schema}.{table} (column_name);`"
                     )
         parts.append("")
 
+    # --- Execution Plans (Oracle) ---------------------------------------------
+    if is_oracle:
+        plans = data.get("execution_plans", [])
+        if isinstance(plans, list) and plans:
+            parts.append("## Execution Plans (Top SQL)")
+            parts.append("")
+            for plan in plans[:5]:
+                sid = plan.get("sql_id", "?")
+                steps = plan.get("steps", [])
+                parts.append(f"### Plan for sql_id: `{sid}`")
+                has_full_scan = False
+                has_hash_join = False
+                for step in steps[:20]:
+                    op = str(step.get("operation", ""))
+                    obj = step.get("object_name", "")
+                    cost = step.get("cost", "")
+                    est = step.get("est_rows", "")
+                    line = f"- {op}"
+                    if obj:
+                        line += f" on `{obj}`"
+                    if cost:
+                        line += f" (cost={cost}, rows={est})"
+                    parts.append(line)
+                    if "FULL" in op.upper():
+                        has_full_scan = True
+                    if "HASH JOIN" in op.upper():
+                        has_hash_join = True
+                if has_full_scan:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[PLAN]** sql_id `{sid}` has TABLE ACCESS "
+                        f"FULL in plan. Add appropriate index."
+                    )
+                if has_hash_join:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[PLAN]** sql_id `{sid}` uses HASH JOIN. "
+                        f"Ensure join columns are indexed for NESTED LOOPS "
+                        f"if table is small."
+                    )
+                parts.append("")
+
+    # --- Oracle Parallel Queries ----------------------------------------------
+    if is_oracle:
+        px_rows = _get_rows(data, "parallel_queries")
+        if px_rows:
+            parts.append("## Parallel Queries")
+            for row in px_rows:
+                sid = row.get("sql_id", "?")
+                px = _safe_int(row.get("px_servers", 0))
+                elapsed = _safe_float(row.get("elapsed_sec", 0))
+                sql_text = str(row.get("sql_text") or "")
+                parts.append(
+                    f"- **sql_id: `{sid}`** — {px:,} PX servers, {elapsed:.2f}s elapsed"
+                )
+                if sql_text:
+                    parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
+            parts.append("")
+
     # --- Row Contention & Locking ---------------------------------------------
     contention_key = "row_contention" if is_oracle else "lock_waits"
     rows = _get_rows(data, contention_key)
     parts.append("## Row Contention & Locking")
     if not rows:
-        parts.append("No issues found.\n")
+        parts.append("No active contention detected.\n")
     else:
         parts.append("")
         for row in rows:
@@ -966,9 +1367,10 @@ def _build_findings_report(data: dict[str, Any]) -> str:
                 event = row.get("event", "?")
                 waits = _safe_int(row.get("total_waits", 0))
                 waited_sec = _safe_float(row.get("time_waited_sec", 0))
+                avg_wait = _safe_float(row.get("avg_wait_sec", 0))
                 parts.append(
                     f"**Event: `{event}`** — {waits:,} waits, "
-                    f"{waited_sec:.2f}s total wait time"
+                    f"{waited_sec:.2f}s total, avg {avg_wait:.4f}s/wait"
                 )
                 if waited_sec > 1:
                     action_idx += 1
@@ -983,15 +1385,60 @@ def _build_findings_report(data: dict[str, Any]) -> str:
                 event = row.get("wait_event", "?")
                 event_type = row.get("wait_event_type", "")
                 running_sec = _safe_float(row.get("running_sec", 0))
+                state = row.get("state", "")
                 query = str(row.get("query") or "")
                 parts.append(
-                    f"**PID {pid}** (user: {user}) — wait: {event_type}/{event}, "
-                    f"running {running_sec:.2f}s"
+                    f"**PID {pid}** (user: {user}, state: {state}) — "
+                    f"wait: {event_type}/{event}, running {running_sec:.2f}s"
                 )
                 if query:
-                    parts.append(f"```sql\n{_truncate_sql(query, 200)}\n```")
+                    parts.append(f"```sql\n{_truncate_sql(query, 300)}\n```")
+                if running_sec > 60:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[LONG WAIT]** PID {pid} waiting on "
+                        f"{event_type}/{event} for {running_sec:.0f}s. "
+                        f"Consider `SELECT pg_cancel_backend({pid});`"
+                    )
         parts.append("")
 
+    # --- Wait Events (Oracle / AWR) -------------------------------------------
+    if is_oracle:
+        wait_rows = _get_rows(data, "wait_events") or _get_rows(data, "awr_wait_events")
+        if wait_rows:
+            parts.append("## Top Wait Events")
+            parts.append("")
+            for row in wait_rows[:15]:
+                event = row.get("event", "?")
+                waits = _safe_int(row.get("total_waits", 0))
+                waited = _safe_float(row.get("time_waited_sec", 0))
+                avg_w = _safe_float(row.get("avg_wait_sec", 0))
+                line = f"- **`{event}`** — {waits:,} waits, {waited:.2f}s total"
+                if avg_w > 0:
+                    line += f", avg {avg_w:.4f}s"
+                parts.append(line)
+                if waited > 60:
+                    action_idx += 1
+                    action_items.append(
+                        f"{action_idx}. **[WAIT]** `{event}` — "
+                        f"{waited:.2f}s total wait time. "
+                        f"Investigate root cause (I/O, lock, latch)."
+                    )
+            parts.append("")
+
+    # --- pgProfile Wait Events ------------------------------------------------
+    if not is_oracle:
+        pgp_wait_rows = _get_rows(data, "pgprofile_wait_events")
+        if pgp_wait_rows:
+            parts.append("## Wait Events (pgProfile)")
+            for row in pgp_wait_rows[:15]:
+                etype = row.get("event_type", "?")
+                event = row.get("event", "?")
+                waits = _safe_int(row.get("total_waits", 0))
+                waited = _safe_float(row.get("total_waited_sec", 0))
+                parts.append(f"- **{etype}/{event}** — {waits:,} waits, {waited:.2f}s")
+            parts.append("")
+
     # --- Sequence Caching Issues -----------------------------------------------
     seq_key = "sequence_no_cache" if is_oracle else "sequence_cache_issues"
     rows = _get_rows(data, seq_key)
@@ -1032,7 +1479,6 @@ def _build_findings_report(data: dict[str, Any]) -> str:
         rows = _get_rows(data, "stale_statistics")
     else:
         rows = _get_rows(data, "stale_stats_vacuum") + _get_rows(data, "bloat_estimate")
-        # Deduplicate by table name
         seen_tables: set[str] = set()
         deduped: list[dict[str, Any]] = []
         for r in rows:
@@ -1084,10 +1530,11 @@ def _build_findings_report(data: dict[str, Any]) -> str:
                 if dead_pct > 20 or dead > 50000:
                     action_idx += 1
                     action_items.append(
-                        f"{action_idx}. **[BLOAT]** `VACUUM ANALYZE {schema}.{table};` "
+                        f"{action_idx}. **[BLOAT]** "
+                        f"`VACUUM ANALYZE {schema}.{table};` "
                         f"— {dead_pct:.1f}% dead tuples"
                     )
-                elif str(last_analyze) == "never" or str(last_analyze) == "None":
+                elif str(last_analyze) in ("never", "None"):
                     action_idx += 1
                     action_items.append(
                         f"{action_idx}. **[STALE STATS]** "
@@ -1097,10 +1544,8 @@ def _build_findings_report(data: dict[str, Any]) -> str:
 
     # --- Unused Indexes -------------------------------------------------------
     rows = _get_rows(data, "unused_indexes")
-    parts.append("## Unused Indexes")
-    if not rows:
-        parts.append("No issues found.\n")
-    else:
+    if rows:
+        parts.append("## Unused Indexes")
         parts.append("")
         for row in rows:
             schema = row.get("schemaname", "public")
@@ -1119,6 +1564,30 @@ def _build_findings_report(data: dict[str, Any]) -> str:
                 )
         parts.append("")
 
+    # --- Table Stats (PostgreSQL) — top tables by activity --------------------
+    if not is_oracle:
+        tbl_rows = _get_rows(data, "table_stats")
+        if tbl_rows:
+            parts.append("## Top Tables by Activity")
+            parts.append("")
+            for row in tbl_rows[:10]:
+                schema = row.get("schemaname", "public")
+                table = row.get("relname", "?")
+                seq_scan = _safe_int(row.get("seq_scan", 0))
+                idx_scan = _safe_int(row.get("idx_scan", 0))
+                inserts = _safe_int(row.get("n_tup_ins", 0))
+                updates = _safe_int(row.get("n_tup_upd", 0))
+                deletes = _safe_int(row.get("n_tup_del", 0))
+                live = _safe_int(row.get("n_live_tup", 0))
+                dead = _safe_int(row.get("n_dead_tup", 0))
+                parts.append(
+                    f"- **`{schema}.{table}`** — seq: {seq_scan:,}, "
+                    f"idx: {idx_scan:,}, ins/upd/del: "
+                    f"{inserts:,}/{updates:,}/{deletes:,}, "
+                    f"live: {live:,}, dead: {dead:,}"
+                )
+            parts.append("")
+
     # --- Checkpoint / WAL Issues (PostgreSQL) ---------------------------------
     if not is_oracle:
         cp_rows = _get_rows(data, "checkpoint_stats")
@@ -1129,9 +1598,14 @@ def _build_findings_report(data: dict[str, Any]) -> str:
             backend_pct = _safe_float(row.get("backend_write_pct", 0))
             req = _safe_int(row.get("checkpoints_req", 0))
             timed = _safe_int(row.get("checkpoints_timed", 0))
+            buf_cp = _safe_int(row.get("buffers_checkpoint", 0))
+            buf_clean = _safe_int(row.get("buffers_clean", 0))
+            buf_backend = _safe_int(row.get("buffers_backend", 0))
             parts.append(
-                f"Checkpoints: {timed} timed, {req} requested. "
-                f"Backend write %: {backend_pct:.1f}%"
+                f"- Checkpoints: {timed:,} timed, {req:,} requested\n"
+                f"- Buffers: checkpoint={buf_cp:,}, clean={buf_clean:,}, "
+                f"backend={buf_backend:,}\n"
+                f"- Backend write %: {backend_pct:.1f}%"
             )
             if backend_pct > 10:
                 has_issue = True
@@ -1145,51 +1619,42 @@ def _build_findings_report(data: dict[str, Any]) -> str:
                 has_issue = True
                 action_idx += 1
                 action_items.append(
-                    f"{action_idx}. **[CHECKPOINT]** More requested ({req}) than "
-                    f"timed ({timed}) checkpoints — increase `max_wal_size`."
+                    f"{action_idx}. **[CHECKPOINT]** More requested ({req:,}) "
+                    f"than timed ({timed:,}) checkpoints — increase "
+                    f"`max_wal_size`."
                 )
         if not has_issue:
             parts.append("No issues found.")
         parts.append("")
 
-    # --- Wait Events (Oracle) -------------------------------------------------
-    if is_oracle:
-        rows = _get_rows(data, "wait_events")
-        parts.append("## Top Wait Events")
-        if not rows:
-            parts.append("No issues found.\n")
-        else:
-            parts.append("")
-            for row in rows[:10]:
-                event = row.get("event", "?")
-                waits = _safe_int(row.get("total_waits", 0))
-                waited = _safe_float(row.get("time_waited_sec", 0))
-                parts.append(f"- **`{event}`** — {waits:,} waits, {waited:.2f}s")
-            parts.append("")
-
     # --- Temp File Usage (PostgreSQL) -----------------------------------------
     if not is_oracle:
         rows = _get_rows(data, "temp_file_usage")
         if rows:
             parts.append("## Temp File Usage")
             parts.append("")
-            for row in rows[:5]:
+            for row in rows[:10]:
                 sid = row.get("queryid", "?")
                 temp_mb = _safe_float(row.get("temp_mb", 0))
+                calls = _safe_int(row.get("calls", 0))
                 sql_text = str(row.get("query_text") or "")
-                parts.append(f"**queryid: `{sid}`** — {temp_mb:.1f} MB temp usage")
+                parts.append(
+                    f"**queryid: `{sid}`** — {temp_mb:.1f} MB temp, {calls:,} calls"
+                )
                 if sql_text:
-                    parts.append(f"```sql\n{_truncate_sql(sql_text, 200)}\n```")
+                    parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
                 if temp_mb > 100:
                     action_idx += 1
                     action_items.append(
                         f"{action_idx}. **[TEMP FILES]** queryid `{sid}` uses "
-                        f"{temp_mb:.1f} MB temp. Increase `work_mem` or optimize "
-                        f"sort/join."
+                        f"{temp_mb:.1f} MB temp. Increase `work_mem` or "
+                        f"optimize sort/join."
                     )
             parts.append("")
 
-    # --- Executive Summary & Action Plan --------------------------------------
+    # =====================================================================
+    # EXECUTIVE SUMMARY & ACTION PLAN
+    # =====================================================================
     summary_parts: list[str] = []
     high_elapsed = _get_rows(data, "high_elapsed_per_exec")
     high_exec = _get_rows(data, "high_execution_count")
@@ -1198,7 +1663,14 @@ def _build_findings_report(data: dict[str, Any]) -> str:
     seqs = _get_rows(
         data, "sequence_no_cache" if is_oracle else "sequence_cache_issues"
     )
+    top_sql = (
+        _get_rows(data, "top_cpu_sql" if is_oracle else "top_cpu_queries")
+        or _get_rows(data, "awr_top_sql")
+        or _get_rows(data, "pgprofile_top_sql")
+    )
 
+    if top_sql:
+        summary_parts.append(f"{len(top_sql)} top SQL statements analysed")
     if high_elapsed:
         summary_parts.append(
             f"{len(high_elapsed)} queries with high elapsed time per execution"
@@ -1209,7 +1681,8 @@ def _build_findings_report(data: dict[str, Any]) -> str:
         )
     if fts:
         summary_parts.append(
-            f"{len(fts)} {'full table scans' if is_oracle else 'tables with heavy seq scans'}"
+            f"{len(fts)} "
+            f"{'full table scans' if is_oracle else 'tables with heavy seq scans'}"
         )
     if contention:
         summary_parts.append(f"{len(contention)} contention/lock wait events")
@@ -1219,7 +1692,7 @@ def _build_findings_report(data: dict[str, Any]) -> str:
     exec_summary = (
         "Found: " + "; ".join(summary_parts) + "."
         if summary_parts
-        else "No significant performance issues detected."
+        else "No significant performance issues detected in the collected data."
     )
 
     # Build final report: summary at top, then sections, then action plan
@@ -1228,7 +1701,10 @@ def _build_findings_report(data: dict[str, Any]) -> str:
     if action_items:
         footer.extend(action_items)
     else:
-        footer.append("No action items — database appears healthy.")
+        footer.append(
+            "No critical action items — database appears healthy based "
+            "on collected data."
+        )
 
     return "\n".join(header + parts + footer)
 
@@ -1317,32 +1793,15 @@ def check_pg_stat_statements(self) -> bool:
     # -- internal helpers ----------------------------------------------------
 
     def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]:
-        # Programmatic analysis — Python code identifies all issues.
+        # Fully programmatic analysis — Python code identifies all issues.
+        # No LLM involved: codellama hallucinates generic advice.
         findings_report = _build_findings_report(raw_data)
         report_text = self._format_report(raw_data)
 
-        # Ask the LLM for a brief supplementary summary only.
-        llm_summary = ""
-        try:
-            llm_prompt = (
-                findings_report + "\n\n---\n"
-                "Based on the findings above, write 3-5 sentences summarising "
-                "the most critical issues and what the DBA should do first. "
-                "Do NOT repeat the full report. Do NOT invent new findings."
-            )
-            llm_summary = self.llm_client.generate(prompt=llm_prompt)
-        except (ConnectionError, RuntimeError) as exc:
-            llm_summary = f"(LLM summary unavailable: {exc})"
-
-        # Combine: programmatic findings + optional LLM summary
-        analysis = findings_report
-        if llm_summary:
-            analysis += f"\n\n---\n## LLM Summary\n{llm_summary}"
-
         return {
             "raw_data": raw_data,
             "report_text": report_text,
-            "analysis": analysis,
+            "analysis": findings_report,
         }
 
     def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]:
diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py
index 22eb9de..e517384 100644
--- a/tools/pg-assistant/snapshot_compare.py
+++ b/tools/pg-assistant/snapshot_compare.py
@@ -281,19 +281,11 @@ def _build_comparison(
         # Build delta summary table
         delta_table = self._build_delta_table(data_a, data_b, label_a, label_b)
 
-        # Programmatic comparison — Python code identifies all changes.
+        # Fully programmatic comparison — no LLM involved.
         findings = self._build_programmatic_comparison(
             data_a, data_b, label_a, label_b, delta_table
         )
-
-        # Optional LLM summary appended after the real findings.
-        comparison_text = self._format_comparison_text(
-            data_a, data_b, label_a, label_b, delta_table
-        )
-        llm_summary = self._get_llm_comparison(comparison_text)
         analysis = findings
-        if llm_summary:
-            analysis += f"\n\n---\n## LLM Summary\n{llm_summary}"
 
         return {
             "figures": figures,

From f74637a10326addddc55dc86556470311ad1ad30 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 12:12:41 +0000
Subject: [PATCH 18/19] Copilot-quality analysis: severity-grouped bottlenecks,
 risk register, config review, prioritised actions

---
 tools/pg-assistant/app.py              |   16 +-
 tools/pg-assistant/auto_analyse.py     | 2047 ++++++++++++++----------
 tools/pg-assistant/snapshot_compare.py |   54 -
 3 files changed, 1246 insertions(+), 871 deletions(-)

diff --git a/tools/pg-assistant/app.py b/tools/pg-assistant/app.py
index de1d146..d134b0b 100644
--- a/tools/pg-assistant/app.py
+++ b/tools/pg-assistant/app.py
@@ -671,14 +671,14 @@ def _render_comparison(result: dict) -> None:
 
             with acol2:
                 if st.button(
-                    "🧠 Full Analysis (Data + LLM)",
+                    "🧠 Full Analysis",
                     use_container_width=True,
                     type="primary",
                 ):
                     analyser = PerformanceAnalyser(
                         db_client=db_client, llm_client=llm_client
                     )
-                    with st.spinner("Collecting data and running LLM analysis..."):
+                    with st.spinner("Collecting data and running analysis..."):
                         result = analyser.analyse()
                     st.session_state.analyser = analyser
                     st.session_state["_last_analysis"] = result
@@ -820,7 +820,7 @@ def _render_comparison(result: dict) -> None:
                     )
                 else:
                     with st.spinner(
-                        "Collecting pg_stat_statements data and running LLM analysis..."
+                        "Collecting pg_stat_statements data and running analysis..."
                     ):
                         result = analyser.analyse_pg_stat_latest()
                     st.session_state.analyser = analyser
@@ -831,7 +831,7 @@ def _render_comparison(result: dict) -> None:
         elif analyse_mode == "Upload report file":
             st.markdown(
                 "Upload an **AWR report** (HTML/text), **pg_stat_statements CSV**, "
-                "or **pgProfile report** (HTML/text) for LLM-powered analysis."
+                "or **pgProfile report** (HTML/text) for analysis."
             )
             uploaded_file = st.file_uploader(
                 "Choose a report file",
@@ -1002,7 +1002,7 @@ def _render_comparison(result: dict) -> None:
     st.subheader("🔧 SQL Tuning Advisor")
     st.markdown(
         "Paste a SQL statement to get its **execution plan**, table metadata, "
-        "and **LLM-powered tuning recommendations** (index suggestions, "
+        "and **tuning recommendations** (index suggestions, "
         "SQL rewrites, stats maintenance)."
     )
 
@@ -1049,9 +1049,7 @@ def _render_comparison(result: dict) -> None:
 
         if tune_btn and tune_sql.strip():
             advisor = SQLTuningAdvisor(db_client=db_client, llm_client=llm_client)
-            with st.spinner(
-                "Running EXPLAIN, collecting metadata, analysing with LLM..."
-            ):
+            with st.spinner("Running EXPLAIN, collecting metadata, analysing..."):
                 result = advisor.analyse_sql(tune_sql.strip(), run_analyze=run_analyze)
 
             if result.get("error"):
@@ -1063,7 +1061,7 @@ def _render_comparison(result: dict) -> None:
                     st.subheader("Execution Plan")
                     st.code(plan_text, language="text")
 
-                # Show LLM analysis
+                # Show analysis
                 analysis = result.get("analysis", "")
                 if analysis:
                     st.divider()
diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index d3f891f..004b351 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -796,6 +796,119 @@
     CROSS JOIN pg_stat_bgwriter bg
 """
 
+_PG_TABLE_SIZES = """
+    SELECT
+        schemaname,
+        relname,
+        pg_relation_size(relid) / 1048576 AS table_size_mb,
+        pg_total_relation_size(relid) / 1048576 AS total_size_mb,
+        (pg_total_relation_size(relid) - pg_relation_size(relid)) / 1048576
+            AS toast_index_size_mb,
+        n_live_tup,
+        n_dead_tup,
+        n_tup_ins, n_tup_upd, n_tup_del
+    FROM pg_stat_user_tables
+    ORDER BY pg_total_relation_size(relid) DESC
+    LIMIT 20
+"""
+
+_PG_WAL_STATS = """
+    SELECT
+        wal_records,
+        wal_fpi,
+        wal_bytes,
+        wal_buffers_full,
+        wal_write,
+        wal_sync,
+        ROUND(wal_write_time::numeric, 2) AS wal_write_time_ms,
+        ROUND(wal_sync_time::numeric, 2) AS wal_sync_time_ms,
+        stats_reset::text AS stats_reset
+    FROM pg_stat_wal
+"""
+
+_PG_IDLE_IN_TRANSACTION = """
+    SELECT
+        pid,
+        usename,
+        datname,
+        state,
+        LEFT(query, 300) AS query,
+        ROUND(EXTRACT(EPOCH FROM (now() - state_change))::numeric, 0)
+            AS idle_duration_sec,
+        ROUND(EXTRACT(EPOCH FROM (now() - xact_start))::numeric, 0)
+            AS xact_duration_sec
+    FROM pg_stat_activity
+    WHERE state = 'idle in transaction'
+    ORDER BY xact_start
+    LIMIT 20
+"""
+
+_PG_CONFIG_PARAMS = """
+    SELECT name, setting, unit
+    FROM pg_settings
+    WHERE name IN (
+        'shared_buffers', 'effective_cache_size', 'work_mem',
+        'maintenance_work_mem', 'max_connections', 'max_wal_size',
+        'min_wal_size', 'checkpoint_timeout', 'checkpoint_completion_target',
+        'random_page_cost', 'effective_io_concurrency',
+        'autovacuum_max_workers', 'autovacuum_vacuum_scale_factor',
+        'autovacuum_analyze_scale_factor', 'statement_timeout',
+        'idle_in_transaction_session_timeout', 'wal_level',
+        'max_worker_processes', 'max_parallel_workers',
+        'max_parallel_workers_per_gather', 'wal_compression',
+        'huge_pages', 'shared_preload_libraries'
+    )
+    ORDER BY name
+"""
+
+_PG_REPLICATION_STATUS = """
+    SELECT
+        client_addr::text,
+        state,
+        sent_lsn::text,
+        write_lsn::text,
+        flush_lsn::text,
+        replay_lsn::text,
+        ROUND(EXTRACT(EPOCH FROM write_lag)::numeric, 3) AS write_lag_sec,
+        ROUND(EXTRACT(EPOCH FROM flush_lag)::numeric, 3) AS flush_lag_sec,
+        ROUND(EXTRACT(EPOCH FROM replay_lag)::numeric, 3) AS replay_lag_sec
+    FROM pg_stat_replication
+"""
+
+_ORA_CONFIG_PARAMS = """
+    SELECT name, value, description
+    FROM v$parameter
+    WHERE name IN (
+        'sga_target', 'sga_max_size', 'pga_aggregate_target',
+        'db_cache_size', 'shared_pool_size', 'log_buffer',
+        'processes', 'sessions', 'open_cursors',
+        'cursor_sharing', 'optimizer_mode', 'db_file_multiblock_read_count',
+        'undo_retention', 'undo_tablespace',
+        'result_cache_max_size', 'parallel_max_servers',
+        'parallel_min_servers', 'job_queue_processes'
+    )
+    ORDER BY name
+"""
+
+_ORA_IDLE_SESSIONS = """
+    SELECT * FROM (
+        SELECT
+            sid,
+            serial#,
+            username,
+            status,
+            machine,
+            program,
+            ROUND(last_call_et / 60, 1) AS idle_minutes,
+            sql_id AS last_sql_id
+        FROM v$session
+        WHERE status = 'INACTIVE'
+          AND type = 'USER'
+          AND last_call_et > 300
+        ORDER BY last_call_et DESC
+    ) WHERE ROWNUM <= 20
+"""
+
 # ---------------------------------------------------------------------------
 # Programmatic analysis — Python code does the heavy lifting, not the LLM.
 # ---------------------------------------------------------------------------
@@ -827,32 +940,60 @@ def _truncate_sql(sql_text: str, length: int = 200) -> str:
     return sql_text
 
 
+def _fmt_bytes(b: float) -> str:
+    """Format bytes into human-readable size."""
+    if b >= 1073741824:
+        return f"{b / 1073741824:.1f} GB"
+    if b >= 1048576:
+        return f"{b / 1048576:.1f} MB"
+    if b >= 1024:
+        return f"{b / 1024:.1f} KB"
+    return f"{b:.0f} B"
+
+
+def _fmt_secs(s: float) -> str:
+    """Format seconds into human-readable duration."""
+    if s >= 86400:
+        return f"{s / 86400:.1f} days"
+    if s >= 3600:
+        return f"{s / 3600:.1f} hrs"
+    if s >= 60:
+        return f"{s / 60:.1f} min"
+    return f"{s:.2f} sec"
+
+
 def _build_findings_report(data: dict[str, Any]) -> str:
-    """Analyse collected data programmatically and build a markdown report.
+    """Production-grade performance analysis report.
 
-    This function does the actual analysis in Python code — identifying
-    problematic SQL, full table scans, missing indexes, etc. from the
-    real data. No LLM is involved in finding issues.
+    Analyses collected data programmatically — identifies bottlenecks,
+    groups by severity, references specific SQL IDs / table names / metrics.
+    No LLM is involved. Output format inspired by enterprise DBA assessments.
     """
     db_type = data.get("db_type", "unknown")
     is_oracle = db_type == DB_TYPE_ORACLE
-    parts: list[str] = []
-    action_items: list[str] = []
-    action_idx = 0
-
-    parts.append(f"# Performance Analysis Report ({db_type.upper()})")
-    parts.append("")
+    # Accumulate bottlenecks as (severity, title, details_markdown)
+    bottlenecks: list[tuple[int, str, str]] = []
+    # Accumulate prioritised actions as (priority, action_text)
+    actions: list[tuple[int, str]] = []
+    # Accumulate risks as (likelihood, impact, description)
+    risks: list[tuple[str, str, str]] = []
+    act_idx = 0
 
     # =====================================================================
-    # DATABASE-LEVEL OVERVIEW
+    # PHASE 1 — Extract key metrics
     # =====================================================================
 
-    # --- Database Stats (PostgreSQL) ------------------------------------------
+    # --- PostgreSQL database stats ---
+    cache_hit = 100.0
+    commits = rollbacks = 0
+    backends = 0
+    temp_bytes = temp_files = 0
+    blks_read = blks_hit = 0
     if not is_oracle:
         db_rows = _get_rows(data, "database_stats")
         if db_rows:
             row = db_rows[0]
-            cache_hit = _safe_float(row.get("cache_hit_pct", 0))
+            cache_hit = _safe_float(row.get("cache_hit_pct", 100))
             commits = _safe_int(row.get("xact_commit", 0))
             rollbacks = _safe_int(row.get("xact_rollback", 0))
             backends = _safe_int(row.get("numbackends", 0))
@@ -860,853 +1001,1138 @@ def _build_findings_report(data: dict[str, Any]) -> str:
             blks_hit = _safe_int(row.get("blks_hit", 0))
             temp_bytes = _safe_int(row.get("temp_bytes", 0))
             temp_files = _safe_int(row.get("temp_files", 0))
-            parts.append("## Database Overview")
-            parts.append(
-                f"- **Cache hit ratio:** {cache_hit:.2f}%\n"
-                f"- **Active backends:** {backends}\n"
-                f"- **Transactions:** {commits:,} commits, {rollbacks:,} rollbacks\n"
-                f"- **Blocks:** {blks_hit:,} hit, {blks_read:,} read from disk\n"
-                f"- **Temp usage:** {temp_files:,} files, "
-                f"{temp_bytes / 1048576:.1f} MB"
-            )
-            if cache_hit < 95 and blks_read > 0:
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[CACHE]** Cache hit ratio is {cache_hit:.2f}% "
-                    f"(target > 99%). Increase `shared_buffers`."
-                )
-            if rollbacks > 0 and commits > 0:
-                rb_pct = rollbacks / (commits + rollbacks) * 100
-                if rb_pct > 5:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[ROLLBACKS]** {rb_pct:.1f}% rollback rate "
-                        f"({rollbacks:,}/{commits + rollbacks:,}). "
-                        f"Investigate application error handling."
-                    )
-            parts.append("")
 
-    # --- Connection Stats (PostgreSQL) ----------------------------------------
-    if not is_oracle:
-        conn_rows = _get_rows(data, "connection_stats")
-        if conn_rows:
-            parts.append("## Connection Stats")
-            for row in conn_rows:
-                state = row.get("state", "unknown") or "null"
-                count = _safe_int(row.get("count", 0))
-                wtype = row.get("wait_event_type", "None")
-                parts.append(f"- **{state}**: {count} connections (wait: {wtype})")
-            idle_count = sum(
-                _safe_int(r.get("count", 0))
-                for r in conn_rows
-                if (r.get("state") or "").startswith("idle")
-            )
-            if idle_count > 50:
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[CONNECTIONS]** {idle_count} idle connections. "
-                    f"Use connection pooling (PgBouncer)."
-                )
-            parts.append("")
-
-    # --- Oracle System Stats --------------------------------------------------
+    # --- Oracle system stats ---
+    ora_cache_hit = 100.0
+    ora_hard_parse_pct = 0.0
+    ora_disk_sort_pct = 0.0
+    ora_rb_count = 0
+    ora_commit_count = 0
     if is_oracle:
         sys_rows = _get_rows(data, "system_stats") or _get_rows(
             data, "awr_system_stats"
         )
-        if sys_rows:
-            parts.append("## System Statistics")
-            stats_map: dict[str, int] = {}
-            for row in sys_rows:
-                name = str(row.get("name", ""))
-                val = _safe_int(row.get("value", 0))
-                stats_map[name] = val
-                parts.append(f"- **{name}:** {val:,}")
-            # Cache hit ratio
-            db_gets = stats_map.get("db block gets", 0)
-            consistent = stats_map.get("consistent gets", 0)
-            phys_reads = stats_map.get("physical reads", 0)
-            logical = db_gets + consistent
-            if logical > 0:
-                hit_pct = (1 - phys_reads / logical) * 100
-                parts.append(f"\n**Buffer cache hit ratio: {hit_pct:.2f}%**")
-                if hit_pct < 95:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[CACHE]** Buffer cache hit ratio is "
-                        f"{hit_pct:.2f}% (target > 99%). "
-                        f"Increase `db_cache_size`."
-                    )
-            hard_parse = stats_map.get("parse count (hard)", 0)
-            total_parse = stats_map.get("parse count (total)", 0)
-            if total_parse > 0:
-                hard_pct = hard_parse / total_parse * 100
-                if hard_pct > 30:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[PARSING]** Hard parse ratio is "
-                        f"{hard_pct:.1f}%. Use bind variables."
-                    )
-            sorts_disk = stats_map.get("sorts (disk)", 0)
-            sorts_mem = stats_map.get("sorts (memory)", 0)
-            if sorts_disk > 0 and sorts_mem > 0:
-                disk_pct = sorts_disk / (sorts_mem + sorts_disk) * 100
-                if disk_pct > 5:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[SORTS]** {disk_pct:.1f}% sorts on disk "
-                        f"({sorts_disk:,}/{sorts_mem + sorts_disk:,}). "
-                        f"Increase `sort_area_size` / `PGA_AGGREGATE_TARGET`."
-                    )
-            parts.append("")
+        stats_map: dict[str, int] = {}
+        for row in sys_rows:
+            stats_map[str(row.get("name", ""))] = _safe_int(row.get("value", 0))
+        db_gets = stats_map.get("db block gets", 0)
+        consistent = stats_map.get("consistent gets", 0)
+        phys_reads = stats_map.get("physical reads", 0)
+        logical = db_gets + consistent
+        if logical > 0:
+            ora_cache_hit = (1 - phys_reads / logical) * 100
+        hard_parse = stats_map.get("parse count (hard)", 0)
+        total_parse = stats_map.get("parse count (total)", 0)
+        if total_parse > 0:
+            ora_hard_parse_pct = hard_parse / total_parse * 100
+        sorts_disk = stats_map.get("sorts (disk)", 0)
+        sorts_mem = stats_map.get("sorts (memory)", 0)
+        if sorts_mem + sorts_disk > 0:
+            ora_disk_sort_pct = sorts_disk / (sorts_mem + sorts_disk) * 100
+        ora_rb_count = stats_map.get("user rollbacks", 0)
+        ora_commit_count = stats_map.get("user commits", 0)
+
+    # --- WAL stats (PostgreSQL 14+) ---
+    wal_bytes = 0
+    wal_fpi = 0
+    wal_sync_time_ms = 0.0
+    wal_write_time_ms = 0.0
+    if not is_oracle:
+        wal_rows = _get_rows(data, "wal_stats")
+        if wal_rows:
+            w = wal_rows[0]
+            wal_bytes = _safe_int(w.get("wal_bytes", 0))
+            wal_fpi = _safe_int(w.get("wal_fpi", 0))
+            wal_sync_time_ms = _safe_float(w.get("wal_sync_time_ms", 0))
+            wal_write_time_ms = _safe_float(w.get("wal_write_time_ms", 0))
+
+    # --- Connection counts ---
+    idle_in_tx_rows = _get_rows(data, "idle_in_transaction") if not is_oracle else []
+    idle_session_rows = _get_rows(data, "idle_sessions") if is_oracle else []
+    conn_rows = _get_rows(data, "connection_stats") if not is_oracle else []
+    idle_count = sum(
+        _safe_int(r.get("count", 0))
+        for r in conn_rows
+        if (r.get("state") or "").startswith("idle")
+    )
 
-    # --- Oracle SGA Info ------------------------------------------------------
-    if is_oracle:
-        sga_rows = _get_rows(data, "sga_info")
-        if sga_rows:
-            parts.append("## SGA Configuration")
-            for row in sga_rows:
-                name = row.get("name", "?")
-                size_mb = _safe_float(row.get("size_mb", 0))
-                parts.append(f"- **{name}:** {size_mb:.0f} MB")
-            parts.append("")
+    # --- Top SQL ---
+    top_cpu = (
+        _get_rows(data, "top_cpu_sql")
+        if is_oracle
+        else _get_rows(data, "top_cpu_queries")
+    )
+    top_elapsed = (
+        _get_rows(data, "top_elapsed_sql")
+        if is_oracle
+        else _get_rows(data, "top_queries")
+    )
+    # Fallback to AWR / pgProfile
+    if not top_cpu and not top_elapsed:
+        top_elapsed = _get_rows(data, "awr_top_sql") or _get_rows(
+            data, "pgprofile_top_sql"
+        )
+    high_elapsed = _get_rows(data, "high_elapsed_per_exec")
+    high_exec = _get_rows(data, "high_execution_count")
+    fts = (
+        _get_rows(data, "full_table_scans")
+        if is_oracle
+        else _get_rows(data, "seq_scan_tables")
+    )
 
-    # --- Oracle Tablespace I/O ------------------------------------------------
-    if is_oracle:
-        ts_rows = _get_rows(data, "tablespace_io")
-        if ts_rows:
-            parts.append("## Tablespace I/O")
-            for row in ts_rows:
-                ts_name = row.get("tablespace_name", "?")
-                reads = _safe_int(row.get("physical_reads", 0))
-                writes = _safe_int(row.get("physical_writes", 0))
-                read_sec = _safe_float(row.get("read_time_sec", 0))
-                write_sec = _safe_float(row.get("write_time_sec", 0))
-                parts.append(
-                    f"- **`{ts_name}`** — reads: {reads:,} ({read_sec:.2f}s), "
-                    f"writes: {writes:,} ({write_sec:.2f}s)"
-                )
-                if read_sec > 10:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[I/O]** Tablespace `{ts_name}` has "
-                        f"{read_sec:.2f}s read time. Move to faster storage or "
-                        f"redistribute I/O."
-                    )
-            parts.append("")
+    # --- Tables ---
+    table_sizes = _get_rows(data, "table_sizes") if not is_oracle else []
+    bloat_rows = _get_rows(data, "bloat_estimate") if not is_oracle else []
+    unused_idx = _get_rows(data, "unused_indexes")
+    stale_rows = (
+        _get_rows(data, "stale_stats_vacuum")
+        if not is_oracle
+        else _get_rows(data, "stale_statistics")
+    )
 
-    # --- Oracle Redo Log Switches ---------------------------------------------
-    if is_oracle:
-        redo_rows = _get_rows(data, "redo_log_switches")
-        if redo_rows:
-            parts.append("## Redo Log Switches (Last 24h)")
-            for row in redo_rows:
-                hour = row.get("switch_hour", "?")
-                switches = _safe_int(row.get("switches", 0))
-                parts.append(f"- **{hour}:** {switches} switches")
-                if switches > 10:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[REDO]** {switches} log switches in hour "
-                        f"{hour}. Increase redo log file size."
-                    )
-            parts.append("")
+    # --- Contention ---
+    contention = (
+        _get_rows(data, "row_contention")
+        if is_oracle
+        else _get_rows(data, "lock_waits")
+    )
+    wait_rows = (
+        _get_rows(data, "wait_events")
+        or _get_rows(data, "awr_wait_events")
+        or _get_rows(data, "pgprofile_wait_events")
+    )
 
-    # --- Oracle Temp Usage ----------------------------------------------------
-    if is_oracle:
-        temp_rows = _get_rows(data, "temp_usage")
-        if temp_rows:
-            parts.append("## Temp Tablespace Usage")
-            for row in temp_rows:
-                ts_name = row.get("tablespace_name", "?")
-                used_mb = _safe_float(row.get("used_mb", 0))
-                free_mb = _safe_float(row.get("free_mb", 0))
-                pct = _safe_float(row.get("pct_used", 0))
-                parts.append(
-                    f"- **`{ts_name}`** — {used_mb:.0f} MB used, "
-                    f"{free_mb:.0f} MB free ({pct:.1f}% used)"
-                )
-                if pct > 80:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[TEMP]** `{ts_name}` is {pct:.1f}% full. "
-                        f"Add temp datafile or resize."
-                    )
-            parts.append("")
+    # --- Sequences ---
+    seqs = (
+        _get_rows(data, "sequence_no_cache")
+        if is_oracle
+        else _get_rows(data, "sequence_cache_issues")
+    )
+
+    # --- Config ---
+    config_rows = _get_rows(data, "config_params")
+
+    # --- Checkpoint (PG) ---
+    ckpt_rows = _get_rows(data, "checkpoint_stats") if not is_oracle else []
+
+    # --- Replication (PG) ---
+    repl_rows = _get_rows(data, "replication_status") if not is_oracle else []
+
+    # --- Temp file usage (PG) ---
+    temp_sql_rows = _get_rows(data, "temp_file_usage") if not is_oracle else []
 
     # =====================================================================
-    # TOP SQL BY RESOURCE CONSUMPTION
+    # PHASE 2 -- Identify bottlenecks with severity
     # =====================================================================
+    # Severity 1 = critical, 2 = important, 3 = advisory
+
+    # -- Rollback explosion --
+    rb_rate = 0.0
+    if not is_oracle and commits + rollbacks > 0:
+        rb_rate = rollbacks / (commits + rollbacks) * 100
+    elif is_oracle and ora_commit_count + ora_rb_count > 0:
+        rb_rate = ora_rb_count / (ora_commit_count + ora_rb_count) * 100
+    if rb_rate > 10:
+        detail = (
+            f"**{rollbacks:,} rollbacks** vs {commits:,} commits "
+            f"(**{rb_rate:.1f}% rollback rate**)\n\n"
+            if not is_oracle
+            else f"**{ora_rb_count:,} rollbacks** vs {ora_commit_count:,} commits "
+            f"(**{rb_rate:.1f}% rollback rate**)\n\n"
+        )
+        detail += (
+            "This almost always means:\n"
+            "- Business validation aborts\n"
+            "- Exception-based flow control\n"
+            "- Retry loops without guardrails\n\n"
+            "Directly increases WAL, dead tuples, autovacuum load."
+        )
+        bottlenecks.append((1, "Rollback Explosion", detail))
+        act_idx += 1
+        actions.append(
+            (
+                0,
+                f"{act_idx}. **Root-cause rollbacks** -- identify "
+                f"why {rb_rate:.1f}% of transactions are aborted",
+            )
+        )
+        risks.append(("High", "Severe", "Dead tuple accumulation from rollbacks"))
+    elif rb_rate > 5:
+        total_rb = rollbacks if not is_oracle else ora_rb_count
+        bottlenecks.append(
+            (
+                2,
+                "Elevated Rollback Rate",
+                f"Rollback rate is **{rb_rate:.1f}%** "
+                f"({total_rb:,} rollbacks). Investigate application "
+                f"error handling.",
+            )
+        )
+        act_idx += 1
+        actions.append((1, f"{act_idx}. Investigate rollback sources"))
 
-    # --- Top CPU SQL (always show — this is the most important section) -------
-    cpu_key = "top_cpu_sql" if is_oracle else "top_cpu_queries"
-    cpu_rows = _get_rows(data, cpu_key)
-    # Also check AWR top SQL / pgProfile top SQL as alternatives
-    if not cpu_rows:
-        cpu_rows = _get_rows(data, "awr_top_sql")
-    if not cpu_rows:
-        cpu_rows = _get_rows(data, "pgprofile_top_sql")
-    if cpu_rows:
-        parts.append("## Top SQL by CPU / Elapsed Time")
-        parts.append("")
-        for i, row in enumerate(cpu_rows[:15]):
-            sid = row.get("sql_id") or row.get("queryid") or "?"
-            id_label = "sql_id" if is_oracle else "queryid"
-            if is_oracle:
-                cpu_sec = _safe_float(row.get("cpu_sec", 0))
-                elapsed_sec = _safe_float(row.get("elapsed_sec", 0))
-                execs = _safe_int(row.get("executions", 0))
-                gets = _safe_int(row.get("buffer_gets", 0))
-                gets_per = _safe_int(row.get("gets_per_exec", 0))
-                sql_text = str(row.get("sql_text") or "")
-                parts.append(
-                    f"**{i + 1}. {id_label}: `{sid}`** — "
-                    f"CPU: {cpu_sec:.2f}s, elapsed: {elapsed_sec:.2f}s, "
-                    f"{execs:,} executions, buffer gets: {gets:,} "
-                    f"({gets_per:,}/exec)"
-                )
-            else:
-                total_sec = _safe_float(
-                    row.get("total_exec_sec") or row.get("total_exec_time", 0)
-                )
-                mean_sec = _safe_float(
-                    row.get("mean_exec_sec") or row.get("mean_exec_time", 0)
-                )
-                calls = _safe_int(row.get("calls", 0))
-                cache_hit = _safe_float(row.get("cache_hit_pct", 100))
-                blk_read = _safe_int(row.get("shared_blks_read", 0))
-                blk_hit = _safe_int(row.get("shared_blks_hit", 0))
-                parts.append(
-                    f"**{i + 1}. {id_label}: `{sid}`** — "
-                    f"total: {total_sec:.2f}s, avg: {mean_sec:.4f}s/call, "
-                    f"{calls:,} calls, cache hit: {cache_hit:.1f}%, "
-                    f"blks read: {blk_read:,}, blks hit: {blk_hit:,}"
-                )
-            sql_text = str(row.get("sql_text") or row.get("query_text") or "")
-            if sql_text:
-                parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
-            # Generate action items for top offenders
-            if i < 5:
-                if is_oracle:
-                    if gets_per > 10000:
-                        action_idx += 1
-                        action_items.append(
-                            f"{action_idx}. **[TOP CPU]** `{sid}` — "
-                            f"{gets_per:,} buffer gets/exec. "
-                            f"Review execution plan: "
-                            f"`SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR"
-                            f"('{sid}'));`"
-                        )
-                else:
-                    if cache_hit < 90 and blk_read > 1000:
-                        action_idx += 1
-                        action_items.append(
-                            f"{action_idx}. **[LOW CACHE HIT]** queryid `{sid}` — "
-                            f"cache hit {cache_hit:.1f}%, {blk_read:,} blocks read. "
-                            f"Add indexes or increase `shared_buffers`."
-                        )
-                    if mean_sec > 1.0:
-                        action_idx += 1
-                        action_items.append(
-                            f"{action_idx}. **[SLOW QUERY]** queryid `{sid}` — "
-                            f"avg {mean_sec:.4f}s/call. Run "
-                            f"`EXPLAIN (ANALYZE, BUFFERS) <query>` to investigate."
-                        )
-        parts.append("")
+    # -- Idle-in-transaction (PG) --
+    if idle_in_tx_rows:
+        total_idle_sec = sum(
+            _safe_float(r.get("xact_duration_sec", 0)) for r in idle_in_tx_rows
+        )
+        longest = max(
+            _safe_float(r.get("xact_duration_sec", 0)) for r in idle_in_tx_rows
+        )
+        detail = (
+            f"**{len(idle_in_tx_rows)} sessions** idle in transaction, "
+            f"cumulative **{_fmt_secs(total_idle_sec)}**, "
+            f"longest **{_fmt_secs(longest)}**\n\n"
+            "| PID | User | Duration | Query |\n"
+            "| --- | --- | --- | --- |\n"
+        )
+        for r in idle_in_tx_rows[:10]:
+            pid = r.get("pid", "?")
+            user = r.get("usename", "?")
+            dur = _fmt_secs(_safe_float(r.get("xact_duration_sec", 0)))
+            q = _truncate_sql(str(r.get("query", "")), 80)
+            detail += f"| {pid} | {user} | {dur} | `{q}` |\n"
+        detail += (
+            "\nImpact: prevents vacuum, creates dead tuples, "
+            "increases lock contention.\n"
+            "This is an **application defect**, not a DB tuning issue."
+        )
+        sev = 1 if total_idle_sec > 3600 else 2
+        bottlenecks.append((sev, "Idle-in-Transaction Sessions", detail))
+        act_idx += 1
+        actions.append(
+            (
+                0,
+                f"{act_idx}. **Fix idle-in-transaction at app layer** -- "
+                f"enforce connection/transaction guards, "
+                f"set `idle_in_transaction_session_timeout`",
+            )
+        )
+        risks.append(("High", "Severe", "Bloat & lock risk from idle-in-tx"))
+
+    # -- Idle Oracle sessions --
+    if idle_session_rows and len(idle_session_rows) > 5:
+        detail = (
+            f"**{len(idle_session_rows)} sessions** idle > 5 minutes\n\n"
+            "| SID | User | Idle (min) | Program |\n"
+            "| --- | --- | --- | --- |\n"
+        )
+        for r in idle_session_rows[:10]:
+            detail += (
+                f"| {r.get('sid', '?')} | {r.get('username', '?')} "
+                f"| {_safe_float(r.get('idle_minutes', 0)):.0f} "
+                f"| {r.get('program', '?')} |\n"
+            )
+        bottlenecks.append((2, "Excessive Idle Sessions", detail))
+        act_idx += 1
+        actions.append(
+            (
+                1,
+                f"{act_idx}. Review idle sessions -- consider "
+                f"connection pooling or session timeout",
+            )
+        )
 
-    # --- Top Queries by Total Elapsed (fallback if different from CPU) --------
-    elapsed_key = "top_elapsed_sql" if is_oracle else "top_queries"
-    elapsed_rows = _get_rows(data, elapsed_key)
-    # Only show if we have data AND it's different from cpu_rows
-    if elapsed_rows and elapsed_key != cpu_key:
-        # Check if these are substantially different from the CPU rows
-        cpu_ids = (
-            {str(r.get("sql_id") or r.get("queryid") or "") for r in cpu_rows[:10]}
-            if cpu_rows
-            else set()
+    # -- Cache hit ratio --
+    eff_cache_hit = cache_hit if not is_oracle else ora_cache_hit
+    if eff_cache_hit < 95:
+        detail = f"Buffer cache hit ratio: **{eff_cache_hit:.2f}%** (target > 99%)\n\n"
+        if not is_oracle:
+            detail += (
+                f"Blocks hit: {blks_hit:,}, blocks read from disk: {blks_read:,}\n\n"
+                f"**Fix:** Increase `shared_buffers` "
+                f"(current value shown in Configuration Review below)."
+            )
+        else:
+            detail += "**Fix:** Increase `db_cache_size`."
+        sev = 1 if eff_cache_hit < 90 else 2
+        bottlenecks.append((sev, "Low Buffer Cache Hit Ratio", detail))
+        act_idx += 1
+        param = "shared_buffers" if not is_oracle else "db_cache_size"
+        actions.append(
+            (
+                0,
+                f"{act_idx}. **Increase `{param}`** -- cache hit is {eff_cache_hit:.2f}%",
+            )
+        )
+        risks.append(("High", "High", "Excessive disk I/O from cache misses"))
+
+    # -- WAL pressure (PG) --
+    if wal_bytes > 0:
+        wal_gb = wal_bytes / 1073741824
+        detail = (
+            f"**{wal_gb:.1f} GB WAL** generated (since stats reset)\n"
+            f"- Full-page images (FPI): {wal_fpi:,}\n"
+            f"- WAL sync time: {wal_sync_time_ms / 1000:.1f} sec\n"
+            f"- WAL write time: {wal_write_time_ms / 1000:.1f} sec\n"
+        )
+        if wal_sync_time_ms > wal_write_time_ms * 5 and wal_sync_time_ms > 1000:
+            detail += (
+                "\nWAL sync time is **much higher** than write time "
+                "-- disk sync latency issue."
+            )
+            bottlenecks.append((1, "WAL & Write Pressure", detail))
+            risks.append(("Medium-High", "Severe", "WAL disk saturation"))
+        elif wal_gb > 10:
+            bottlenecks.append((2, "High WAL Volume", detail))
+        act_idx += 1
+        actions.append(
+            (
+                1,
+                f"{act_idx}. Review WAL generation -- "
+                f"batch commits, consider `wal_compression`",
+            )
         )
-        new_rows = [
-            r
-            for r in elapsed_rows
-            if str(r.get("sql_id") or r.get("queryid") or "") not in cpu_ids
-        ]
-        if new_rows:
-            parts.append("## Additional Top SQL by Total Elapsed Time")
-            parts.append("*(Not already listed in Top CPU section)*\n")
-            for i, row in enumerate(new_rows[:10]):
-                sid = row.get("sql_id") or row.get("queryid") or "?"
-                id_label = "sql_id" if is_oracle else "queryid"
-                if is_oracle:
-                    elapsed_sec = _safe_float(row.get("elapsed_sec", 0))
-                    execs = _safe_int(row.get("executions", 0))
-                    gets = _safe_int(row.get("buffer_gets", 0))
-                    sql_text = str(row.get("sql_text") or "")
-                    parts.append(
-                        f"**{i + 1}. {id_label}: `{sid}`** — "
-                        f"elapsed: {elapsed_sec:.2f}s, {execs:,} execs, "
-                        f"buffer gets: {gets:,}"
-                    )
-                else:
-                    total_sec = _safe_float(row.get("total_exec_sec", 0))
-                    mean_sec = _safe_float(row.get("mean_exec_sec", 0))
-                    calls = _safe_int(row.get("calls", 0))
-                    cache_hit = _safe_float(row.get("cache_hit_pct", 100))
-                    parts.append(
-                        f"**{i + 1}. {id_label}: `{sid}`** — "
-                        f"total: {total_sec:.2f}s, avg: {mean_sec:.4f}s/call, "
-                        f"{calls:,} calls, cache hit: {cache_hit:.1f}%"
-                    )
-                sql_text = str(row.get("sql_text") or row.get("query_text") or "")
-                if sql_text:
-                    parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
-            parts.append("")
 
-    # =====================================================================
-    # THRESHOLD-BASED FINDINGS
-    # =====================================================================
+    # -- Hard parse ratio (Oracle) --
+    if is_oracle and ora_hard_parse_pct > 30:
+        bottlenecks.append(
+            (
+                2,
+                "High Hard Parse Ratio",
+                f"Hard parse ratio: **{ora_hard_parse_pct:.1f}%**\n\n"
+                f"**Fix:** Use bind variables instead of literal values.",
+            )
+        )
+        act_idx += 1
+        actions.append(
+            (
+                1,
+                f"{act_idx}. Use bind variables -- "
+                f"hard parse ratio is {ora_hard_parse_pct:.1f}%",
+            )
+        )
 
-    # --- High Elapsed Time SQL ------------------------------------------------
-    section_key = "high_elapsed_per_exec"
-    rows = _get_rows(data, section_key)
-    parts.append("## High Elapsed Time per Execution (> 1s avg)")
-    if not rows:
-        parts.append("No queries exceed the 1s/exec threshold.\n")
-    else:
-        parts.append("")
-        for row in rows:
-            sid = row.get("sql_id") or row.get("queryid") or "?"
-            avg_elapsed = _safe_float(row.get("avg_elapsed_sec", 0))
-            total_elapsed = _safe_float(
-                row.get("total_elapsed_sec") or row.get("total_exec_sec", 0)
+    # -- Disk sorts (Oracle) --
+    if is_oracle and ora_disk_sort_pct > 5:
+        bottlenecks.append(
+            (
+                2,
+                "Disk Sorts",
+                f"**{ora_disk_sort_pct:.1f}%** of sorts go to disk.\n\n"
+                f"**Fix:** Increase `PGA_AGGREGATE_TARGET` or `SORT_AREA_SIZE`.",
             )
-            execs = _safe_int(row.get("executions") or row.get("calls", 0))
-            sql_text = str(row.get("sql_text") or row.get("query_text") or "")
-            gets = _safe_int(row.get("buffer_gets") or row.get("shared_blks_read", 0))
-            id_label = "sql_id" if is_oracle else "queryid"
-            parts.append(
-                f"**{id_label}: `{sid}`** — "
-                f"avg {avg_elapsed:.4f}s/exec, {execs:,} executions, "
-                f"total {total_elapsed:.2f}s, buffer gets/reads: {gets:,}"
+        )
+        act_idx += 1
+        actions.append(
+            (1, f"{act_idx}. Increase PGA -- {ora_disk_sort_pct:.1f}% disk sorts")
+        )
+
+    # -- Top SQL bottlenecks --
+    top_sql_all = top_cpu or top_elapsed
+    if top_sql_all:
+        total_elapsed = sum(
+            _safe_float(
+                r.get("elapsed_sec", 0)
+                or r.get("total_exec_sec", 0)
+                or r.get("cpu_sec", 0)
             )
-            if sql_text:
-                parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
-            action_idx += 1
+            for r in top_sql_all
+        )
+        top1 = top_sql_all[0]
+        if is_oracle:
+            t1_id = top1.get("sql_id", "?")
+            t1_elapsed = _safe_float(
+                top1.get("elapsed_sec", 0) or top1.get("cpu_sec", 0)
+            )
+            t1_execs = _safe_int(top1.get("executions", 0))
+            t1_gets = _safe_int(top1.get("buffer_gets", 0))
+            t1_sql = str(top1.get("sql_text", ""))
+        else:
+            t1_id = str(top1.get("queryid", "?"))
+            t1_elapsed = _safe_float(top1.get("total_exec_sec", 0))
+            t1_execs = _safe_int(top1.get("calls", 0))
+            t1_gets = _safe_int(top1.get("shared_blks_hit", 0)) + _safe_int(
+                top1.get("shared_blks_read", 0)
+            )
+            t1_sql = str(top1.get("query_text", ""))
+
+        detail = (
+            f"Top query alone: **{_fmt_secs(t1_elapsed)}** elapsed, "
+            f"**{t1_execs:,}** executions, **{t1_gets:,}** buffer gets\n\n"
+        )
+        id_col = "sql_id" if is_oracle else "queryid"
+        detail += (
+            f"| # | {id_col} | Elapsed | Executions | Buffer Gets | Query |\n"
+            f"| --- | --- | --- | --- | --- | --- |\n"
+        )
+        for i, r in enumerate(top_sql_all[:10]):
             if is_oracle:
-                action_items.append(
-                    f"{action_idx}. **[HIGH ELAPSED]** `{sid}` "
-                    f"(avg {avg_elapsed:.4f}s/exec). Check plan: "
-                    f"`SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR"
-                    f"('{sid}'));` — add indexes on WHERE/JOIN columns."
-                )
+                sid = r.get("sql_id", "?")
+                elapsed = _safe_float(r.get("elapsed_sec", 0) or r.get("cpu_sec", 0))
+                execs = _safe_int(r.get("executions", 0))
+                gets = _safe_int(r.get("buffer_gets", 0))
+                sql = _truncate_sql(str(r.get("sql_text", "")), 60)
             else:
-                action_items.append(
-                    f"{action_idx}. **[HIGH ELAPSED]** queryid `{sid}` "
-                    f"(avg {avg_elapsed:.4f}s/exec). Run "
-                    f"`EXPLAIN (ANALYZE, BUFFERS)` on this query and "
-                    f"add indexes on filtered columns."
+                sid = str(r.get("queryid", "?"))
+                elapsed = _safe_float(r.get("total_exec_sec", 0))
+                execs = _safe_int(r.get("calls", 0))
+                gets = _safe_int(r.get("shared_blks_hit", 0)) + _safe_int(
+                    r.get("shared_blks_read", 0)
                 )
-        parts.append("")
-
-    # --- High Execution Count SQL ---------------------------------------------
-    section_key = "high_execution_count"
-    rows = _get_rows(data, section_key)
-    parts.append("## High Execution Count SQL (> 1000 calls)")
-    if not rows:
-        parts.append("No queries exceed the 1000 execution threshold.\n")
-    else:
-        parts.append("")
-        for row in rows:
-            sid = row.get("sql_id") or row.get("queryid") or "?"
-            execs = _safe_int(row.get("executions") or row.get("calls", 0))
-            total_elapsed = _safe_float(
-                row.get("total_elapsed_sec") or row.get("total_exec_sec", 0)
+                sql = _truncate_sql(str(r.get("query_text", "")), 60)
+            detail += (
+                f"| {i + 1} | `{sid}` | {_fmt_secs(elapsed)} "
+                f"| {execs:,} | {gets:,} | `{sql}` |\n"
             )
-            sql_text = str(row.get("sql_text") or row.get("query_text") or "")
-            id_label = "sql_id" if is_oracle else "queryid"
-            parts.append(
-                f"**{id_label}: `{sid}`** — "
-                f"{execs:,} executions, total {total_elapsed:.2f}s"
+
+        if t1_sql:
+            detail += (
+                f"\n**Top #1 full query text:**\n"
+                f"```sql\n{_truncate_sql(t1_sql, 500)}\n```\n"
             )
-            if sql_text:
-                parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
-            if execs > 100000:
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[HIGH EXEC COUNT]** `{sid}` executed "
-                    f"{execs:,} times. Consider caching results, batching, "
-                    f"or reducing call frequency."
+
+        if len(top_sql_all) >= 3:
+            top3_elapsed = sum(
+                _safe_float(
+                    r.get("elapsed_sec", 0)
+                    or r.get("total_exec_sec", 0)
+                    or r.get("cpu_sec", 0)
                 )
-        parts.append("")
+                for r in top_sql_all[:3]
+            )
+            if total_elapsed > 0 and top3_elapsed / total_elapsed > 0.7:
+                pct = top3_elapsed / total_elapsed * 100
+                detail += f"\n**Top 3 queries = ~{pct:.0f}% of total execution time.**"
+
+        bottlenecks.append((1, "Query-Level Offenders (Top SQL)", detail))
+        act_idx += 1
+        actions.append(
+            (
+                0,
+                f"{act_idx}. **Review top SQL** -- "
+                f"{id_col} `{t1_id}` accounts for "
+                f"{_fmt_secs(t1_elapsed)} elapsed",
+            )
+        )
 
-    # --- Full Table Scans -----------------------------------------------------
-    fts_key = "full_table_scans" if is_oracle else "seq_scan_tables"
-    rows = _get_rows(data, fts_key)
-    parts.append("## Full Table Scans / Sequential Scans")
-    if not rows:
-        parts.append("No issues found.\n")
-    else:
-        parts.append("")
-        for row in rows:
+    # -- High elapsed per execution --
+    if high_elapsed:
+        detail = "Queries taking > 1 sec per execution:\n\n"
+        id_col = "sql_id" if is_oracle else "queryid"
+        detail += (
+            f"| {id_col} | Avg Elapsed | Total Elapsed | Execs | Query |\n"
+            f"| --- | --- | --- | --- | --- |\n"
+        )
+        for r in high_elapsed[:10]:
             if is_oracle:
-                table = row.get("table_name", "?")
-                owner = row.get("object_owner", "")
-                sid = row.get("sql_id", "?")
-                execs = _safe_int(row.get("executions", 0))
-                elapsed = _safe_float(row.get("elapsed_sec", 0))
-                gets = _safe_int(row.get("buffer_gets", 0))
-                sql_text = str(row.get("sql_text") or "")
-                parts.append(
-                    f"**Table: `{owner}.{table}`** — sql_id: `{sid}`, "
-                    f"{execs:,} execs, {elapsed:.2f}s elapsed, "
-                    f"{gets:,} buffer gets"
-                )
-                if sql_text:
-                    parts.append(f"```sql\n{_truncate_sql(sql_text, 400)}\n```")
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[FULL TABLE SCAN]** `{owner}.{table}` "
-                    f"via sql_id `{sid}`. Add index on columns in WHERE clause "
-                    f"or use hints to force index access."
-                )
+                sid = r.get("sql_id", "?")
+                avg_e = _safe_float(r.get("avg_elapsed_sec", 0))
+                tot_e = _safe_float(r.get("total_elapsed_sec", 0))
+                execs = _safe_int(r.get("executions", 0))
+                sql = _truncate_sql(str(r.get("sql_text", "")), 60)
             else:
-                table = row.get("relname", "?")
-                schema = row.get("schemaname", "public")
-                seq_scans = _safe_int(row.get("seq_scan", 0))
-                seq_tup_read = _safe_int(row.get("seq_tup_read", 0))
-                idx_scans = _safe_int(row.get("idx_scan", 0))
-                live_tup = _safe_int(row.get("n_live_tup", 0))
-                size_mb = _safe_float(row.get("table_size_mb", 0))
-                avg_rows = _safe_int(row.get("avg_rows_per_seq_scan", 0))
-                parts.append(
-                    f"**Table: `{schema}.{table}`** — "
-                    f"{seq_scans:,} seq scans ({avg_rows:,} rows/scan avg, "
-                    f"{seq_tup_read:,} rows read), "
-                    f"{idx_scans:,} idx scans, {live_tup:,} live rows, "
-                    f"{size_mb:.1f} MB"
-                )
-                if seq_scans > 100 and live_tup > 10000:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[SEQ SCAN]** `{schema}.{table}` has "
-                        f"{seq_scans:,} seq scans on {live_tup:,} rows "
-                        f"({size_mb:.1f} MB). Add indexes on frequently "
-                        f"filtered columns: "
-                        f"`CREATE INDEX ON {schema}.{table} (column_name);`"
-                    )
-        parts.append("")
+                sid = str(r.get("queryid", "?"))
+                avg_e = _safe_float(r.get("avg_elapsed_sec", 0))
+                tot_e = _safe_float(r.get("total_exec_sec", 0))
+                execs = _safe_int(r.get("calls", 0))
+                sql = _truncate_sql(str(r.get("query_text", "")), 60)
+            detail += (
+                f"| `{sid}` | {_fmt_secs(avg_e)} | {_fmt_secs(tot_e)} "
+                f"| {execs:,} | `{sql}` |\n"
+            )
+        bottlenecks.append((2, "High Elapsed Time per Execution", detail))
+        act_idx += 1
+        actions.append(
+            (
+                1,
+                f"{act_idx}. Tune slow queries -- "
+                f"{len(high_elapsed)} queries > 1 sec/exec",
+            )
+        )
 
-    # --- Execution Plans (Oracle) ---------------------------------------------
-    if is_oracle:
-        plans = data.get("execution_plans", [])
-        if isinstance(plans, list) and plans:
-            parts.append("## Execution Plans (Top SQL)")
-            parts.append("")
-            for plan in plans[:5]:
-                sid = plan.get("sql_id", "?")
-                steps = plan.get("steps", [])
-                parts.append(f"### Plan for sql_id: `{sid}`")
-                has_full_scan = False
-                has_hash_join = False
-                for step in steps[:20]:
-                    op = str(step.get("operation", ""))
-                    obj = step.get("object_name", "")
-                    cost = step.get("cost", "")
-                    est = step.get("est_rows", "")
-                    line = f"- {op}"
-                    if obj:
-                        line += f" on `{obj}`"
-                    if cost:
-                        line += f" (cost={cost}, rows={est})"
-                    parts.append(line)
-                    if "FULL" in op.upper():
-                        has_full_scan = True
-                    if "HASH JOIN" in op.upper():
-                        has_hash_join = True
-                if has_full_scan:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[PLAN]** sql_id `{sid}` has TABLE ACCESS "
-                        f"FULL in plan. Add appropriate index."
+    # -- High execution count --
+    if high_exec:
+        detail = "Queries with > 1,000 executions (high frequency):\n\n"
+        id_col = "sql_id" if is_oracle else "queryid"
+        detail += (
+            f"| {id_col} | Calls | Total Elapsed | Avg Elapsed | Query |\n"
+            f"| --- | --- | --- | --- | --- |\n"
+        )
+        for r in high_exec[:10]:
+            if is_oracle:
+                sid = r.get("sql_id", "?")
+                execs = _safe_int(r.get("executions", 0))
+                tot_e = _safe_float(r.get("total_elapsed_sec", 0))
+                avg_e = tot_e / max(execs, 1)
+                sql = _truncate_sql(str(r.get("sql_text", "")), 60)
+            else:
+                sid = str(r.get("queryid", "?"))
+                execs = _safe_int(r.get("calls", 0))
+                tot_e = _safe_float(r.get("total_exec_sec", 0))
+                avg_e = _safe_float(r.get("mean_exec_sec", 0))
+                sql = _truncate_sql(str(r.get("query_text", "")), 60)
+            detail += (
+                f"| `{sid}` | {execs:,} | {_fmt_secs(tot_e)} "
+                f"| {_fmt_secs(avg_e)} | `{sql}` |\n"
+            )
+        bottlenecks.append((2, "High Execution Count Queries", detail))
+
+    # -- Full table scans / Sequential scans --
+    if fts:
+        if is_oracle:
+            detail = "Full table scans detected:\n\n"
+            detail += (
+                "| sql_id | Table | Executions | Elapsed | Query |\n"
+                "| --- | --- | --- | --- | --- |\n"
+            )
+            for r in fts[:10]:
+                owner = r.get("object_owner", "")
+                table = r.get("table_name", "?")
+                sid = r.get("sql_id", "?")
+                execs = _safe_int(r.get("executions", 0))
+                elapsed = _safe_float(r.get("elapsed_sec", 0))
+                sql = _truncate_sql(str(r.get("sql_text", "")), 60)
+                detail += (
+                    f"| `{sid}` | `{owner}.{table}` | {execs:,} "
+                    f"| {_fmt_secs(elapsed)} | `{sql}` |\n"
+                )
+                act_idx += 1
+                actions.append(
+                    (
+                        1,
+                        f"{act_idx}. **Add index** on `{owner}.{table}` "
+                        f"for sql_id `{sid}` (full table scan, "
+                        f"{execs:,} execs, {_fmt_secs(elapsed)})",
                     )
-                if has_hash_join:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[PLAN]** sql_id `{sid}` uses HASH JOIN. "
-                        f"Ensure join columns are indexed for NESTED LOOPS "
-                        f"if table is small."
+                )
+        else:
+            detail = "Tables with heavy sequential scans:\n\n"
+            detail += (
+                "| Table | Seq Scans | Rows/Scan | Size | "
+                "Idx Scans | Live Rows |\n"
+                "| --- | --- | --- | --- | --- | --- |\n"
+            )
+            for r in fts[:10]:
+                schema = r.get("schemaname", "public")
+                table = r.get("relname", "?")
+                ss = _safe_int(r.get("seq_scan", 0))
+                avg_r = _safe_int(r.get("avg_rows_per_seq_scan", 0))
+                sz = _safe_float(r.get("table_size_mb", 0))
+                idx_s = _safe_int(r.get("idx_scan", 0))
+                live = _safe_int(r.get("n_live_tup", 0))
+                detail += (
+                    f"| `{schema}.{table}` | {ss:,} | {avg_r:,} "
+                    f"| {sz:.1f} MB | {idx_s:,} | {live:,} |\n"
+                )
+                if ss > 100 and live > 10000:
+                    act_idx += 1
+                    actions.append(
+                        (
+                            1,
+                            f"{act_idx}. **Add index** on `{schema}.{table}` -- "
+                            f"{ss:,} seq scans on {live:,} rows ({sz:.1f} MB)",
+                        )
                     )
-                parts.append("")
+        sev = 1 if len(fts) > 5 else 2
+        bottlenecks.append((sev, "Full Table Scans / Sequential Scans", detail))
+        risks.append(("Medium-High", "High", "I/O amplification from table scans"))
 
-    # --- Oracle Parallel Queries ----------------------------------------------
-    if is_oracle:
-        px_rows = _get_rows(data, "parallel_queries")
-        if px_rows:
-            parts.append("## Parallel Queries")
-            for row in px_rows:
-                sid = row.get("sql_id", "?")
-                px = _safe_int(row.get("px_servers", 0))
-                elapsed = _safe_float(row.get("elapsed_sec", 0))
-                sql_text = str(row.get("sql_text") or "")
-                parts.append(
-                    f"- **sql_id: `{sid}`** — {px:,} PX servers, {elapsed:.2f}s elapsed"
+    # -- Contention & locking --
+    if contention:
+        if is_oracle:
+            detail = "Contention/lock wait events:\n\n"
+            detail += (
+                "| Event | Waits | Time Waited | Avg Wait |\n"
+                "| --- | --- | --- | --- |\n"
+            )
+            for r in contention[:10]:
+                event = r.get("event", "?")
+                waits = _safe_int(r.get("total_waits", 0))
+                tw = _safe_float(r.get("time_waited_sec", 0))
+                aw = _safe_float(r.get("avg_wait_sec", 0))
+                detail += (
+                    f"| {event} | {waits:,} | {_fmt_secs(tw)} | {_fmt_secs(aw)} |\n"
                 )
-                if sql_text:
-                    parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
-            parts.append("")
-
-    # --- Row Contention & Locking ---------------------------------------------
-    contention_key = "row_contention" if is_oracle else "lock_waits"
-    rows = _get_rows(data, contention_key)
-    parts.append("## Row Contention & Locking")
-    if not rows:
-        parts.append("No active contention detected.\n")
-    else:
-        parts.append("")
-        for row in rows:
-            if is_oracle:
-                event = row.get("event", "?")
-                waits = _safe_int(row.get("total_waits", 0))
-                waited_sec = _safe_float(row.get("time_waited_sec", 0))
-                avg_wait = _safe_float(row.get("avg_wait_sec", 0))
-                parts.append(
-                    f"**Event: `{event}`** — {waits:,} waits, "
-                    f"{waited_sec:.2f}s total, avg {avg_wait:.4f}s/wait"
+        else:
+            detail = "Active lock waits:\n\n"
+            detail += (
+                "| PID | User | Wait Event | Running | Query |\n"
+                "| --- | --- | --- | --- | --- |\n"
+            )
+            for r in contention[:10]:
+                pid = r.get("pid", "?")
+                user = r.get("usename", "?")
+                we = f"{r.get('wait_event_type', '')}:{r.get('wait_event', '')}"
+                dur = _safe_float(r.get("running_sec", 0))
+                q = _truncate_sql(str(r.get("query", "")), 60)
+                detail += f"| {pid} | {user} | {we} | {_fmt_secs(dur)} | `{q}` |\n"
+        bottlenecks.append((2, "Row Contention & Locking", detail))
+        risks.append(("Medium", "High", "Lock escalation / deadlock risk"))
+
+    # -- Wait events --
+    if wait_rows and not contention:
+        detail = "Top wait events:\n\n"
+        detail += "| Event | Waits | Time Waited |\n| --- | --- | --- |\n"
+        for r in wait_rows[:10]:
+            event = r.get("event", r.get("event_name", "?"))
+            waits = _safe_int(r.get("total_waits", 0))
+            tw = _safe_float(r.get("time_waited_sec", 0))
+            detail += f"| {event} | {waits:,} | {_fmt_secs(tw)} |\n"
+        bottlenecks.append((2, "Top Wait Events", detail))
+
+    # -- Table sizes & bloat (PG) --
+    if table_sizes:
+        detail = "Largest tables:\n\n"
+        detail += (
+            "| Table | Total Size | Table Size | TOAST+Idx | "
+            "Live Rows | Ins | Upd | Del |\n"
+            "| --- | --- | --- | --- | --- | --- | --- | --- |\n"
+        )
+        for r in table_sizes[:10]:
+            schema = r.get("schemaname", "public")
+            table = r.get("relname", "?")
+            total = _safe_float(r.get("total_size_mb", 0))
+            tbl = _safe_float(r.get("table_size_mb", 0))
+            toast = _safe_float(r.get("toast_index_size_mb", 0))
+            live = _safe_int(r.get("n_live_tup", 0))
+            ins = _safe_int(r.get("n_tup_ins", 0))
+            upd = _safe_int(r.get("n_tup_upd", 0))
+            dele = _safe_int(r.get("n_tup_del", 0))
+            total_str = f"{total / 1024:.1f} GB" if total >= 1024 else f"{total:.0f} MB"
+            tbl_str = f"{tbl / 1024:.1f} GB" if tbl >= 1024 else f"{tbl:.0f} MB"
+            toast_str = f"{toast / 1024:.1f} GB" if toast >= 1024 else f"{toast:.0f} MB"
+            detail += (
+                f"| `{schema}.{table}` | {total_str} | {tbl_str} "
+                f"| {toast_str} | {live:,} | {ins:,} | {upd:,} | {dele:,} |\n"
+            )
+            if total > 10240:
+                risks.append(
+                    (
+                        "Medium",
+                        "Medium",
+                        f"`{schema}.{table}` is {total_str} -- consider partitioning",
+                    )
                 )
-                if waited_sec > 1:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[CONTENTION]** `{event}` — "
-                        f"{waited_sec:.2f}s total. Reduce hot-row updates, "
-                        f"increase INITRANS, or tune locking strategy."
+                act_idx += 1
+                actions.append(
+                    (
+                        1,
+                        f"{act_idx}. **Partition** `{schema}.{table}` "
+                        f"({total_str}) -- time-based or business key",
                     )
-            else:
-                pid = row.get("pid", "?")
-                user = row.get("usename", "?")
-                event = row.get("wait_event", "?")
-                event_type = row.get("wait_event_type", "")
-                running_sec = _safe_float(row.get("running_sec", 0))
-                state = row.get("state", "")
-                query = str(row.get("query") or "")
-                parts.append(
-                    f"**PID {pid}** (user: {user}, state: {state}) — "
-                    f"wait: {event_type}/{event}, running {running_sec:.2f}s"
                 )
-                if query:
-                    parts.append(f"```sql\n{_truncate_sql(query, 300)}\n```")
-                if running_sec > 60:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[LONG WAIT]** PID {pid} waiting on "
-                        f"{event_type}/{event} for {running_sec:.0f}s. "
-                        f"Consider `SELECT pg_cancel_backend({pid});`"
+            if toast > tbl and toast > 1024:
+                act_idx += 1
+                actions.append(
+                    (
+                        2,
+                        f"{act_idx}. Review TOAST usage on `{schema}.{table}` "
+                        f"-- TOAST+Idx ({toast_str}) > table ({tbl_str})",
                     )
-        parts.append("")
-
-    # --- Wait Events (Oracle / AWR) -------------------------------------------
-    if is_oracle:
-        wait_rows = _get_rows(data, "wait_events") or _get_rows(data, "awr_wait_events")
-        if wait_rows:
-            parts.append("## Top Wait Events")
-            parts.append("")
-            for row in wait_rows[:15]:
-                event = row.get("event", "?")
-                waits = _safe_int(row.get("total_waits", 0))
-                waited = _safe_float(row.get("time_waited_sec", 0))
-                avg_w = _safe_float(row.get("avg_wait_sec", 0))
-                line = f"- **`{event}`** — {waits:,} waits, {waited:.2f}s total"
-                if avg_w > 0:
-                    line += f", avg {avg_w:.4f}s"
-                parts.append(line)
-                if waited > 60:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[WAIT]** `{event}` — "
-                        f"{waited:.2f}s total wait time. "
-                        f"Investigate root cause (I/O, lock, latch)."
+                )
+        bottlenecks.append((2, "Table Sizes & Storage", detail))
+
+    # -- Bloat (PG) --
+    if bloat_rows:
+        high_bloat = [r for r in bloat_rows if _safe_float(r.get("dead_pct", 0)) > 20]
+        if high_bloat:
+            detail = "Tables with significant bloat (dead tuples > 20%):\n\n"
+            detail += (
+                "| Table | Dead % | Dead Tuples | Size | Last Vacuum |\n"
+                "| --- | --- | --- | --- | --- |\n"
+            )
+            for r in high_bloat[:10]:
+                schema = r.get("schemaname", "public")
+                table = r.get("relname", "?")
+                dp = _safe_float(r.get("dead_pct", 0))
+                dead = _safe_int(r.get("n_dead_tup", 0))
+                sz = _safe_float(r.get("table_size_mb", 0))
+                lv = r.get("last_autovacuum", "never") or "never"
+                detail += (
+                    f"| `{schema}.{table}` | {dp:.1f}% | {dead:,} "
+                    f"| {sz:.0f} MB | {lv} |\n"
+                )
+                act_idx += 1
+                actions.append(
+                    (
+                        1,
+                        f"{act_idx}. **VACUUM FULL** `{schema}.{table}` -- "
+                        f"{dp:.1f}% dead tuples ({dead:,}): "
+                        f"`VACUUM (VERBOSE, ANALYZE) {schema}.{table};`",
                     )
-            parts.append("")
-
-    # --- pgProfile Wait Events ------------------------------------------------
-    if not is_oracle:
-        pgp_wait_rows = _get_rows(data, "pgprofile_wait_events")
-        if pgp_wait_rows:
-            parts.append("## Wait Events (pgProfile)")
-            for row in pgp_wait_rows[:15]:
-                etype = row.get("event_type", "?")
-                event = row.get("event", "?")
-                waits = _safe_int(row.get("total_waits", 0))
-                waited = _safe_float(row.get("total_waited_sec", 0))
-                parts.append(f"- **{etype}/{event}** — {waits:,} waits, {waited:.2f}s")
-            parts.append("")
+                )
+            bottlenecks.append((1, "Table Bloat", detail))
+            risks.append(("High", "Severe", "Disk exhaustion from bloat"))
 
-    # --- Sequence Caching Issues -----------------------------------------------
-    seq_key = "sequence_no_cache" if is_oracle else "sequence_cache_issues"
-    rows = _get_rows(data, seq_key)
-    parts.append("## Sequence Caching Issues")
-    if not rows:
-        parts.append("No issues found.\n")
-    else:
-        parts.append("")
-        for row in rows:
+    # -- Stale statistics / missing vacuum --
+    stale_critical: list[dict[str, Any]] = []
+    if stale_rows:
+        for r in stale_rows:
             if is_oracle:
-                owner = row.get("sequence_owner", "")
-                name = row.get("sequence_name", "?")
-                cache = _safe_int(row.get("cache_size", 0))
-                parts.append(
-                    f"**`{owner}.{name}`** — cache_size={cache} (should be >= 20)"
-                )
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[SEQUENCE]** "
-                    f"`ALTER SEQUENCE {owner}.{name} CACHE 20;`"
-                )
+                stale = r.get("stale_stats", "")
+                days = _safe_float(r.get("days_since_analyzed", 0))
+                if stale == "YES" or days > 7:
+                    stale_critical.append(r)
             else:
-                schema = row.get("schemaname", "public")
-                name = row.get("sequencename", "?")
-                cache = _safe_int(row.get("cache_size") or 0)
-                parts.append(
-                    f"**`{schema}.{name}`** — cache_size={cache} (should be >= 20)"
+                dead_pct = _safe_float(r.get("dead_pct", 0))
+                la = r.get("last_analyze") or r.get("last_autoanalyze")
+                if dead_pct > 10 or not la:
+                    stale_critical.append(r)
+    if stale_critical:
+        detail = "Tables with stale/missing statistics:\n\n"
+        if is_oracle:
+            detail += (
+                "| Table | Rows | Last Analyzed | Days Stale |\n"
+                "| --- | --- | --- | --- |\n"
+            )
+            for r in stale_critical[:15]:
+                table = r.get("table_name", "?")
+                rows = _safe_int(r.get("num_rows", 0))
+                la = r.get("last_analyzed", "never")
+                days = _safe_float(r.get("days_since_analyzed", 0))
+                detail += f"| `{table}` | {rows:,} | {la} | {days:.0f} |\n"
+                act_idx += 1
+                actions.append(
+                    (
+                        2,
+                        f"{act_idx}. `EXEC DBMS_STATS.GATHER_TABLE_STATS"
+                        f"(ownname=>USER, tabname=>'{table}');`",
+                    )
                 )
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[SEQUENCE]** "
-                    f"`ALTER SEQUENCE {schema}.{name} CACHE 20;`"
+        else:
+            detail += (
+                "| Table | Dead % | Dead Tuples | Last Analyze |\n"
+                "| --- | --- | --- | --- |\n"
+            )
+            for r in stale_critical[:15]:
+                schema = r.get("schemaname", "public")
+                table = r.get("relname", "?")
+                dp = _safe_float(r.get("dead_pct", 0))
+                dead = _safe_int(r.get("n_dead_tup", 0))
+                la = r.get("last_analyze") or r.get("last_autoanalyze") or "never"
+                detail += f"| `{schema}.{table}` | {dp:.1f}% | {dead:,} | {la} |\n"
+                act_idx += 1
+                actions.append((2, f"{act_idx}. `ANALYZE {schema}.{table};`"))
+        bottlenecks.append((2, "Stale Statistics / Missing Vacuum", detail))
+
+    # -- Unused indexes --
+    if unused_idx:
+        total_waste_mb = sum(_safe_float(r.get("index_size_mb", 0)) for r in unused_idx)
+        detail = (
+            f"**{len(unused_idx)} unused indexes** "
+            f"consuming **{total_waste_mb:.0f} MB**:\n\n"
+            "| Index | Table | Size |\n"
+            "| --- | --- | --- |\n"
+        )
+        for r in unused_idx[:15]:
+            if is_oracle:
+                idx = r.get("index_name", "?")
+                table = r.get("table_name", "?")
+                sz = _safe_float(r.get("index_rows", 0))
+                detail += f"| `{idx}` | `{table}` | {sz:,} rows |\n"
+            else:
+                schema = r.get("schemaname", "public")
+                idx = r.get("indexrelname", "?")
+                table = r.get("relname", "?")
+                sz = _safe_float(r.get("index_size_mb", 0))
+                detail += f"| `{schema}.{idx}` | `{table}` | {sz:.0f} MB |\n"
+                act_idx += 1
+                actions.append(
+                    (
+                        2,
+                        f"{act_idx}. `DROP INDEX {schema}.{idx};` "
+                        f"-- never used, {sz:.0f} MB",
+                    )
                 )
-        parts.append("")
+        bottlenecks.append((3, "Unused Indexes", detail))
 
-    # --- Stale Statistics / Vacuum / Bloat ------------------------------------
-    if is_oracle:
-        rows = _get_rows(data, "stale_statistics")
-    else:
-        rows = _get_rows(data, "stale_stats_vacuum") + _get_rows(data, "bloat_estimate")
-        seen_tables: set[str] = set()
-        deduped: list[dict[str, Any]] = []
-        for r in rows:
-            key = f"{r.get('schemaname', '')}.{r.get('relname', '')}"
-            if key not in seen_tables:
-                seen_tables.add(key)
-                deduped.append(r)
-        rows = deduped
-
-    parts.append("## Stale Statistics / Vacuum / Bloat")
-    if not rows:
-        parts.append("No issues found.\n")
-    else:
-        parts.append("")
-        for row in rows:
+    # -- Sequence caching --
+    if seqs:
+        detail = "Sequences with no/low caching (cache_size <= 1):\n\n"
+        detail += "| Sequence | Cache Size |\n| --- | --- |\n"
+        for r in seqs[:15]:
             if is_oracle:
-                table = row.get("table_name", "?")
-                num_rows = _safe_int(row.get("num_rows", 0))
-                stale = row.get("stale_stats", "?")
-                last_analyzed = row.get("last_analyzed", "never")
-                days = _safe_float(row.get("days_since_analyzed", 0))
-                parts.append(
-                    f"**`{table}`** — {num_rows:,} rows, stale={stale}, "
-                    f"last analyzed: {last_analyzed} ({days:.0f} days ago)"
-                )
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[STALE STATS]** "
-                    f"`EXEC DBMS_STATS.GATHER_TABLE_STATS"
-                    f"(ownname=>USER, tabname=>'{table}');`"
-                )
+                name = f"{r.get('sequence_owner', '')}.{r.get('sequence_name', '?')}"
+                cache = _safe_int(r.get("cache_size", 0))
             else:
-                schema = row.get("schemaname", "public")
-                table = row.get("relname", "?")
-                dead = _safe_int(row.get("n_dead_tup", 0))
-                live = _safe_int(row.get("n_live_tup", 0))
-                dead_pct = _safe_float(row.get("dead_pct", 0))
-                last_vac = (
-                    row.get("last_autovacuum") or row.get("last_vacuum") or "never"
-                )
-                last_analyze = (
-                    row.get("last_autoanalyze") or row.get("last_analyze") or "never"
-                )
-                parts.append(
-                    f"**`{schema}.{table}`** — {live:,} live, {dead:,} dead "
-                    f"({dead_pct:.1f}% bloat), last vacuum: {last_vac}, "
-                    f"last analyze: {last_analyze}"
-                )
-                if dead_pct > 20 or dead > 50000:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[BLOAT]** "
-                        f"`VACUUM ANALYZE {schema}.{table};` "
-                        f"— {dead_pct:.1f}% dead tuples"
-                    )
-                elif str(last_analyze) in ("never", "None"):
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[STALE STATS]** "
-                        f"`ANALYZE {schema}.{table};` — never analyzed"
-                    )
-        parts.append("")
+                name = f"{r.get('schemaname', 'public')}.{r.get('sequencename', '?')}"
+                cache = _safe_int(r.get("cache_size", 0))
+            detail += f"| `{name}` | {cache} |\n"
+        detail += (
+            "\n**Fix:** Increase cache size to reduce contention:\n"
+            "```sql\nALTER SEQUENCE seq_name CACHE 100;\n```"
+        )
+        bottlenecks.append((3, "Sequence Caching Issues", detail))
+
+    # -- Temp file usage (PG) --
+    if temp_sql_rows:
+        detail = "Queries spilling to temp files:\n\n"
+        detail += (
+            "| queryid | Temp MB | Calls | Elapsed | Query |\n"
+            "| --- | --- | --- | --- | --- |\n"
+        )
+        for r in temp_sql_rows[:10]:
+            qid = str(r.get("queryid", "?"))
+            tmb = _safe_float(r.get("temp_mb", 0))
+            calls = _safe_int(r.get("calls", 0))
+            elapsed = _safe_float(r.get("total_exec_sec", 0))
+            sql = _truncate_sql(str(r.get("query_text", "")), 60)
+            detail += (
+                f"| `{qid}` | {tmb:.1f} | {calls:,} "
+                f"| {_fmt_secs(elapsed)} | `{sql}` |\n"
+            )
+        detail += "\n**Fix:** Increase `work_mem` or optimise query to reduce sorting."
+        bottlenecks.append((2, "Temp File Usage", detail))
+        act_idx += 1
+        actions.append(
+            (
+                2,
+                f"{act_idx}. Increase `work_mem` -- "
+                f"{len(temp_sql_rows)} queries spilling to disk",
+            )
+        )
 
-    # --- Unused Indexes -------------------------------------------------------
-    rows = _get_rows(data, "unused_indexes")
-    if rows:
-        parts.append("## Unused Indexes")
-        parts.append("")
-        for row in rows:
-            schema = row.get("schemaname", "public")
-            table = row.get("relname", "?")
-            idx_name = row.get("indexrelname", "?")
-            size_mb = _safe_float(row.get("index_size_mb", 0))
-            parts.append(
-                f"**`{schema}.{idx_name}`** on `{table}` — {size_mb:.1f} MB, 0 scans"
+    # -- Checkpoint issues (PG) --
+    if ckpt_rows:
+        ck = ckpt_rows[0]
+        req = _safe_int(ck.get("checkpoints_req", 0))
+        timed = _safe_int(ck.get("checkpoints_timed", 0))
+        buffers_ckpt = _safe_int(ck.get("buffers_checkpoint", 0))
+        buffers_be = _safe_int(ck.get("buffers_backend", 0))
+        backend_pct = 0.0
+        if buffers_ckpt + buffers_be > 0:
+            backend_pct = buffers_be / (buffers_ckpt + buffers_be) * 100
+        if req > timed and timed > 0:
+            detail = (
+                f"Requested checkpoints ({req:,}) **exceed** timed "
+                f"checkpoints ({timed:,})\n\n"
+                f"Backend write %: {backend_pct:.1f}%\n\n"
+                "**Fix:** Increase `max_wal_size` and `checkpoint_timeout`."
             )
-            if size_mb > 1:
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[UNUSED INDEX]** "
-                    f"`DROP INDEX {schema}.{idx_name};` — "
-                    f"{size_mb:.1f} MB wasted"
+            bottlenecks.append((2, "Checkpoint Pressure", detail))
+            act_idx += 1
+            actions.append(
+                (
+                    1,
+                    f"{act_idx}. Increase `max_wal_size` -- "
+                    f"requested checkpoints ({req:,}) > timed ({timed:,})",
                 )
-        parts.append("")
-
-    # --- Table Stats (PostgreSQL) — top tables by activity --------------------
-    if not is_oracle:
-        tbl_rows = _get_rows(data, "table_stats")
-        if tbl_rows:
-            parts.append("## Top Tables by Activity")
-            parts.append("")
-            for row in tbl_rows[:10]:
-                schema = row.get("schemaname", "public")
-                table = row.get("relname", "?")
-                seq_scan = _safe_int(row.get("seq_scan", 0))
-                idx_scan = _safe_int(row.get("idx_scan", 0))
-                inserts = _safe_int(row.get("n_tup_ins", 0))
-                updates = _safe_int(row.get("n_tup_upd", 0))
-                deletes = _safe_int(row.get("n_tup_del", 0))
-                live = _safe_int(row.get("n_live_tup", 0))
-                dead = _safe_int(row.get("n_dead_tup", 0))
-                parts.append(
-                    f"- **`{schema}.{table}`** — seq: {seq_scan:,}, "
-                    f"idx: {idx_scan:,}, ins/upd/del: "
-                    f"{inserts:,}/{updates:,}/{deletes:,}, "
-                    f"live: {live:,}, dead: {dead:,}"
+            )
+        if backend_pct > 20:
+            detail_be = (
+                f"**{backend_pct:.1f}%** of buffers written by backends "
+                f"(should be < 5%)\n\n"
+                "**Fix:** Increase `shared_buffers`, tune `bgwriter_*` params."
+            )
+            bottlenecks.append((2, "Backend Buffer Writes", detail_be))
+
+    # -- Replication lag --
+    if repl_rows:
+        for r in repl_rows:
+            replay_lag = _safe_float(r.get("replay_lag_sec", 0))
+            client = r.get("client_addr", "?")
+            state = r.get("state", "?")
+            if replay_lag > 10:
+                bottlenecks.append(
+                    (
+                        1 if replay_lag > 60 else 2,
+                        f"Replication Lag ({client})",
+                        f"Replica `{client}` ({state}): "
+                        f"replay lag = **{_fmt_secs(replay_lag)}**",
+                    )
+                )
+                risks.append(
+                    (
+                        "Medium-High",
+                        "High",
+                        f"Replication lag {_fmt_secs(replay_lag)} on {client}",
+                    )
                 )
-            parts.append("")
 
-    # --- Checkpoint / WAL Issues (PostgreSQL) ---------------------------------
-    if not is_oracle:
-        cp_rows = _get_rows(data, "checkpoint_stats")
-        parts.append("## Checkpoint / WAL Issues")
-        has_issue = False
-        if cp_rows:
-            row = cp_rows[0]
-            backend_pct = _safe_float(row.get("backend_write_pct", 0))
-            req = _safe_int(row.get("checkpoints_req", 0))
-            timed = _safe_int(row.get("checkpoints_timed", 0))
-            buf_cp = _safe_int(row.get("buffers_checkpoint", 0))
-            buf_clean = _safe_int(row.get("buffers_clean", 0))
-            buf_backend = _safe_int(row.get("buffers_backend", 0))
-            parts.append(
-                f"- Checkpoints: {timed:,} timed, {req:,} requested\n"
-                f"- Buffers: checkpoint={buf_cp:,}, clean={buf_clean:,}, "
-                f"backend={buf_backend:,}\n"
-                f"- Backend write %: {backend_pct:.1f}%"
+    # -- Oracle SGA info --
+    sga_rows = _get_rows(data, "sga_info")
+    if sga_rows:
+        detail = "SGA Memory Allocation:\n\n"
+        detail += "| Component | Size |\n| --- | --- |\n"
+        for r in sga_rows:
+            name = r.get("name", "?")
+            sz = _safe_float(r.get("size_mb", 0))
+            sz_str = f"{sz / 1024:.1f} GB" if sz >= 1024 else f"{sz:.0f} MB"
+            detail += f"| {name} | {sz_str} |\n"
+        bottlenecks.append((3, "SGA Configuration", detail))
+
+    # -- Oracle tablespace I/O --
+    ts_io_rows = _get_rows(data, "tablespace_io")
+    if ts_io_rows:
+        detail = "Tablespace I/O:\n\n"
+        detail += (
+            "| Tablespace | Phys Reads | Phys Writes | "
+            "Read Time | Write Time |\n"
+            "| --- | --- | --- | --- | --- |\n"
+        )
+        for r in ts_io_rows[:10]:
+            ts = r.get("tablespace_name", "?")
+            pr = _safe_int(r.get("physical_reads", 0))
+            pw = _safe_int(r.get("physical_writes", 0))
+            rt = _safe_float(r.get("read_time_sec", 0))
+            wt = _safe_float(r.get("write_time_sec", 0))
+            detail += (
+                f"| {ts} | {pr:,} | {pw:,} | {_fmt_secs(rt)} | {_fmt_secs(wt)} |\n"
             )
-            if backend_pct > 10:
-                has_issue = True
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[CHECKPOINT]** Backend writes are "
-                    f"{backend_pct:.1f}% of total — increase "
-                    f"`shared_buffers` and `checkpoint_completion_target`."
+        bottlenecks.append((3, "Tablespace I/O", detail))
+
+    # -- Oracle redo log switches --
+    redo_rows = _get_rows(data, "redo_log_switches")
+    if redo_rows:
+        max_switches = max(_safe_int(r.get("switches", 0)) for r in redo_rows)
+        if max_switches > 10:
+            detail = "Redo log switches per hour:\n\n"
+            detail += "| Hour | Switches |\n| --- | --- |\n"
+            for r in redo_rows[:12]:
+                detail += (
+                    f"| {r.get('switch_hour', '?')} "
+                    f"| {_safe_int(r.get('switches', 0))} |\n"
                 )
-            if req > timed and timed > 0:
-                has_issue = True
-                action_idx += 1
-                action_items.append(
-                    f"{action_idx}. **[CHECKPOINT]** More requested ({req:,}) "
-                    f"than timed ({timed:,}) checkpoints — increase "
-                    f"`max_wal_size`."
+            detail += (
+                f"\nPeak: **{max_switches} switches/hour** -- "
+                f"consider increasing redo log size."
+            )
+            bottlenecks.append((2, "High Redo Log Switches", detail))
+            act_idx += 1
+            actions.append(
+                (
+                    1,
+                    f"{act_idx}. Increase redo log size -- "
+                    f"peak {max_switches} switches/hour",
                 )
-        if not has_issue:
-            parts.append("No issues found.")
-        parts.append("")
+            )
 
-    # --- Temp File Usage (PostgreSQL) -----------------------------------------
-    if not is_oracle:
-        rows = _get_rows(data, "temp_file_usage")
-        if rows:
-            parts.append("## Temp File Usage")
-            parts.append("")
-            for row in rows[:10]:
-                sid = row.get("queryid", "?")
-                temp_mb = _safe_float(row.get("temp_mb", 0))
-                calls = _safe_int(row.get("calls", 0))
-                sql_text = str(row.get("query_text") or "")
-                parts.append(
-                    f"**queryid: `{sid}`** — {temp_mb:.1f} MB temp, {calls:,} calls"
-                )
-                if sql_text:
-                    parts.append(f"```sql\n{_truncate_sql(sql_text, 300)}\n```")
-                if temp_mb > 100:
-                    action_idx += 1
-                    action_items.append(
-                        f"{action_idx}. **[TEMP FILES]** queryid `{sid}` uses "
-                        f"{temp_mb:.1f} MB temp. Increase `work_mem` or "
-                        f"optimize sort/join."
+    # -- Oracle temp usage --
+    temp_rows = _get_rows(data, "temp_usage")
+    if temp_rows:
+        for r in temp_rows:
+            pct = _safe_float(r.get("pct_used", 0))
+            if pct > 80:
+                ts = r.get("tablespace_name", "?")
+                used = _safe_float(r.get("used_mb", 0))
+                free = _safe_float(r.get("free_mb", 0))
+                bottlenecks.append(
+                    (
+                        2,
+                        f"Temp Tablespace `{ts}` at {pct:.0f}%",
+                        f"Used: {used:.0f} MB, Free: {free:.0f} MB",
                     )
-            parts.append("")
+                )
+                risks.append(("Medium", "High", f"Temp space exhaustion on {ts}"))
 
     # =====================================================================
-    # EXECUTIVE SUMMARY & ACTION PLAN
+    # PHASE 3 -- Generate formatted report
     # =====================================================================
-    summary_parts: list[str] = []
-    high_elapsed = _get_rows(data, "high_elapsed_per_exec")
-    high_exec = _get_rows(data, "high_execution_count")
-    fts = _get_rows(data, "full_table_scans" if is_oracle else "seq_scan_tables")
-    contention = _get_rows(data, "row_contention" if is_oracle else "lock_waits")
-    seqs = _get_rows(
-        data, "sequence_no_cache" if is_oracle else "sequence_cache_issues"
-    )
-    top_sql = (
-        _get_rows(data, "top_cpu_sql" if is_oracle else "top_cpu_queries")
-        or _get_rows(data, "awr_top_sql")
-        or _get_rows(data, "pgprofile_top_sql")
-    )
+    parts: list[str] = []
 
-    if top_sql:
-        summary_parts.append(f"{len(top_sql)} top SQL statements analysed")
-    if high_elapsed:
-        summary_parts.append(
-            f"{len(high_elapsed)} queries with high elapsed time per execution"
+    # --- Header ---
+    parts.append(f"# Performance Analysis Report -- {db_type.upper()}")
+    parts.append("*Programmatic analysis v2 -- no LLM involved*\n")
+    parts.append("---\n")
+
+    # --- 1. Executive Summary ---
+    sev1 = [b for b in bottlenecks if b[0] == 1]
+    sev2 = [b for b in bottlenecks if b[0] == 2]
+    sev3 = [b for b in bottlenecks if b[0] == 3]
+
+    if sev1:
+        health = "CRITICAL -- immediate action required"
+    elif sev2:
+        health = "WARNING -- important issues found"
+    elif sev3:
+        health = "ADVISORY -- minor improvements possible"
+    else:
+        health = "HEALTHY -- no significant issues detected"
+
+    parts.append("## 1. Executive Summary\n")
+    parts.append(f"**Overall health:** {health}\n")
+
+    headlines: list[str] = []
+    if not is_oracle:
+        headlines.append(f"Buffer cache hit ratio: **{cache_hit:.2f}%**")
+        headlines.append(f"Active backends: **{backends}**")
+        headlines.append(
+            f"Transactions: **{commits:,}** commits, **{rollbacks:,}** rollbacks"
         )
-    if high_exec:
-        summary_parts.append(
-            f"{len(high_exec)} queries with very high execution counts"
+        if wal_bytes > 0:
+            headlines.append(f"WAL generated: **{_fmt_bytes(wal_bytes)}**")
+        if temp_bytes > 0:
+            headlines.append(
+                f"Temp files: **{temp_files:,}** files, **{_fmt_bytes(temp_bytes)}**"
+            )
+    else:
+        headlines.append(f"Buffer cache hit ratio: **{ora_cache_hit:.2f}%**")
+        headlines.append(f"Hard parse ratio: **{ora_hard_parse_pct:.1f}%**")
+        if ora_commit_count + ora_rb_count > 0:
+            headlines.append(
+                f"Transactions: **{ora_commit_count:,}** commits, "
+                f"**{ora_rb_count:,}** rollbacks"
+            )
+    headlines.append(
+        f"Issues found: **{len(sev1)}** critical, "
+        f"**{len(sev2)}** important, **{len(sev3)}** advisory"
+    )
+    for h in headlines:
+        parts.append(f"- {h}")
+    parts.append("")
+
+    # --- 2. Database & Workload Distribution ---
+    parts.append("## 2. Database & Workload Overview\n")
+    if not is_oracle:
+        parts.append(
+            "| Metric | Value |\n"
+            "| --- | --- |\n"
+            f"| Cache hit ratio | {cache_hit:.2f}% |\n"
+            f"| Active backends | {backends} |\n"
+            f"| Commits | {commits:,} |\n"
+            f"| Rollbacks | {rollbacks:,} |\n"
+            f"| Blocks hit | {blks_hit:,} |\n"
+            f"| Blocks read (disk) | {blks_read:,} |\n"
+            f"| Temp files | {temp_files:,} |\n"
+            f"| Temp bytes | {_fmt_bytes(temp_bytes)} |"
         )
-    if fts:
-        summary_parts.append(
-            f"{len(fts)} "
-            f"{'full table scans' if is_oracle else 'tables with heavy seq scans'}"
+        if wal_bytes > 0:
+            parts.append(
+                f"| WAL generated | {_fmt_bytes(wal_bytes)} |\n"
+                f"| WAL FPI count | {wal_fpi:,} |\n"
+                f"| WAL sync time | {wal_sync_time_ms / 1000:.1f} sec |\n"
+                f"| WAL write time | {wal_write_time_ms / 1000:.1f} sec |"
+            )
+    else:
+        parts.append(
+            "| Metric | Value |\n"
+            "| --- | --- |\n"
+            f"| Buffer cache hit | {ora_cache_hit:.2f}% |\n"
+            f"| Hard parse ratio | {ora_hard_parse_pct:.1f}% |\n"
+            f"| Disk sort ratio | {ora_disk_sort_pct:.1f}% |\n"
+            f"| Commits | {ora_commit_count:,} |\n"
+            f"| Rollbacks | {ora_rb_count:,} |"
         )
-    if contention:
-        summary_parts.append(f"{len(contention)} contention/lock wait events")
-    if seqs:
-        summary_parts.append(f"{len(seqs)} sequences with no/low caching")
+    parts.append("")
 
-    exec_summary = (
-        "Found: " + "; ".join(summary_parts) + "."
-        if summary_parts
-        else "No significant performance issues detected in the collected data."
-    )
+    # Connection distribution
+    if conn_rows:
+        parts.append("**Connection Distribution:**\n")
+        parts.append("| State | Count | Wait Type |\n| --- | --- | --- |")
+        for r in conn_rows:
+            state = r.get("state", "unknown") or "null"
+            count = _safe_int(r.get("count", 0))
+            wtype = r.get("wait_event_type", "None")
+            parts.append(f"| {state} | {count} | {wtype} |")
+        if idle_count > 50:
+            act_idx += 1
+            actions.append(
+                (
+                    1,
+                    f"{act_idx}. Use connection pooling (PgBouncer) -- "
+                    f"{idle_count} idle connections",
+                )
+            )
+        parts.append("")
 
-    # Build final report: summary at top, then sections, then action plan
-    header = [f"## Executive Summary\n{exec_summary}\n"]
-    footer = ["\n## Action Plan (Priority Order)\n"]
-    if action_items:
-        footer.extend(action_items)
+    # --- 3. Top Bottlenecks ---
+    parts.append("## 3. Top Bottlenecks\n")
+    if not bottlenecks:
+        parts.append(
+            "No significant performance bottlenecks detected in the collected data.\n"
+        )
     else:
-        footer.append(
-            "No critical action items — database appears healthy based "
-            "on collected data."
+        sev_label = {
+            1: "SEV-1 (Critical)",
+            2: "SEV-2 (Important)",
+            3: "SEV-3 (Advisory)",
+        }
+        sev_emoji = {1: "SEV-1", 2: "SEV-2", 3: "SEV-3"}
+        bn_idx = 0
+        for sev in (1, 2, 3):
+            group = [b for b in bottlenecks if b[0] == sev]
+            if not group:
+                continue
+            for _, title, detail in group:
+                bn_idx += 1
+                parts.append(
+                    f"### {sev_emoji.get(sev, '')} {bn_idx}. "
+                    f"{title} ({sev_label[sev]})\n"
+                )
+                parts.append(detail)
+                parts.append("")
+
+    # --- 4. Configuration Review ---
+    section_num = 4
+    if config_rows:
+        parts.append(f"## {section_num}. Configuration Review\n")
+        if is_oracle:
+            parts.append("| Parameter | Value | Description |\n| --- | --- | --- |")
+            for r in config_rows:
+                name = r.get("name", "?")
+                val = r.get("value", "?")
+                desc = _truncate_sql(str(r.get("description", "")), 80)
+                parts.append(f"| `{name}` | `{val}` | {desc} |")
+        else:
+            parts.append("| Parameter | Value | Unit |\n| --- | --- | --- |")
+            risky_params: dict[str, str] = {}
+            for r in config_rows:
+                name = r.get("name", "?")
+                val = r.get("setting", "?")
+                unit = r.get("unit", "") or ""
+                parts.append(f"| `{name}` | `{val}` | {unit} |")
+                if name == "statement_timeout" and str(val) == "0":
+                    risky_params[name] = (
+                        "No statement timeout -- risk of runaway queries"
+                    )
+                if name == "idle_in_transaction_session_timeout" and str(val) == "0":
+                    risky_params[name] = "No idle-in-tx timeout -- risk of bloat"
+                if name == "max_connections":
+                    max_conn = _safe_int(val)
+                    if max_conn > 500:
+                        risky_params[name] = (
+                            f"max_connections={max_conn} is high -- "
+                            f"use connection pooling"
+                        )
+            if risky_params:
+                parts.append("\n**Risks:**")
+                for param, msg in risky_params.items():
+                    parts.append(f"- `{param}`: {msg}")
+                    risks.append(("Medium", "Medium", msg))
+        parts.append("")
+        section_num += 1
+
+    # --- 5. Risk Register ---
+    if risks:
+        parts.append(f"## {section_num}. Risk Register\n")
+        parts.append("| Risk | Likelihood | Impact |\n| --- | --- | --- |")
+        seen_risks: set[str] = set()
+        for likelihood, impact, desc in risks:
+            if desc not in seen_risks:
+                seen_risks.add(desc)
+                parts.append(f"| {desc} | {likelihood} | {impact} |")
+        parts.append("")
+        section_num += 1
+
+    # --- 6. Prioritised Action Plan ---
+    parts.append(f"## {section_num}. Prioritised Action Plan\n")
+    if not actions:
+        parts.append(
+            "No critical action items -- database appears healthy "
+            "based on collected data."
         )
+    else:
+        p0 = [a for a in actions if a[0] == 0]
+        p1 = [a for a in actions if a[0] == 1]
+        p2 = [a for a in actions if a[0] == 2]
+        if p0:
+            parts.append("### Priority 0 -- Immediate (this sprint)\n")
+            for _, text in p0:
+                parts.append(text)
+            parts.append("")
+        if p1:
+            parts.append("### Priority 1 -- Structural\n")
+            for _, text in p1:
+                parts.append(text)
+            parts.append("")
+        if p2:
+            parts.append("### Priority 2 -- Performance Hygiene\n")
+            for _, text in p2:
+                parts.append(text)
+            parts.append("")
 
-    return "\n".join(header + parts + footer)
+    return "\n".join(parts)
 
 
 def _get_rows(data: dict[str, Any], key: str) -> list[dict[str, Any]]:
@@ -1752,9 +2178,9 @@ def analyse_awr_snaps(self, begin_snap: int, end_snap: int) -> dict[str, Any]:
     def analyse_uploaded_report(
         self, file_content: str, file_name: str
     ) -> dict[str, Any]:
-        """Parse an uploaded report file and generate LLM analysis."""
+        """Parse an uploaded report file and display it."""
         parsed = parse_uploaded_report(file_content, file_name)
-        return self._run_llm_analysis_from_text(parsed)
+        return self._run_uploaded_report_analysis(parsed)
 
     def list_awr_snapshots(self) -> list[dict[str, Any]]:
         """Return available AWR snapshots from DBA_HIST_SNAPSHOT."""
@@ -1804,23 +2230,19 @@ def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]:
             "analysis": findings_report,
         }
 
-    def _run_llm_analysis_from_text(self, report_text: str) -> dict[str, Any]:
-        # For uploaded reports, we still need the LLM since we don't
-        # have structured data — but we keep the prompt minimal.
-        llm_prompt = (
-            report_text + "\n\n---\n"
-            "Summarise the key performance issues in the report above. "
-            "Only reference data that actually appears above. "
-            "Do NOT invent sql_ids, table names, or metrics."
-        )
-        try:
-            llm_response = self.llm_client.generate(prompt=llm_prompt)
-        except (ConnectionError, RuntimeError) as exc:
-            llm_response = f"LLM analysis failed: {exc}"
+    def _run_uploaded_report_analysis(self, report_text: str) -> dict[str, Any]:
+        # For uploaded reports we cannot do structured analysis.
+        # Display the parsed text as-is — no LLM involved.
         return {
             "raw_data": {},
             "report_text": report_text,
-            "analysis": llm_response,
+            "analysis": (
+                "## Uploaded Report\n\n"
+                "The parsed report content is shown below. "
+                "For detailed programmatic analysis, use **Live** mode "
+                "which queries the database directly.\n\n"
+                "---\n\n" + report_text[:8000]
+            ),
         }
 
     # -- Oracle collection ---------------------------------------------------
@@ -1844,6 +2266,8 @@ def _collect_oracle(self) -> dict[str, Any]:
             "redo_log_switches": _ORA_REDO_LOG_SWITCHES,
             "temp_usage": _ORA_TEMP_USAGE,
             "parallel_queries": _ORA_PARALLEL_QUERIES,
+            "config_params": _ORA_CONFIG_PARAMS,
+            "idle_sessions": _ORA_IDLE_SESSIONS,
         }
         for name, sql in queries.items():
             result = self.db_client.execute_query(sql)
@@ -1958,6 +2382,7 @@ def _collect_postgresql(self) -> dict[str, Any]:
             "existing_indexes": _PG_EXISTING_INDEXES,
             "stale_stats_vacuum": _PG_STALE_STATS,
             "table_stats": _PG_TABLE_STATS,
+            "table_sizes": _PG_TABLE_SIZES,
             "database_stats": _PG_DB_STATS,
             "bgwriter_stats": bgwriter_sql,
             "unused_indexes": _PG_UNUSED_INDEXES,
@@ -1967,7 +2392,13 @@ def _collect_postgresql(self) -> dict[str, Any]:
             "temp_file_usage": _PG_TEMP_FILE_USAGE,
             "connection_stats": _PG_CONNECTION_STATS,
             "checkpoint_stats": checkpoint_sql,
+            "idle_in_transaction": _PG_IDLE_IN_TRANSACTION,
+            "config_params": _PG_CONFIG_PARAMS,
+            "replication_status": _PG_REPLICATION_STATUS,
         }
+        # WAL stats only available in PG 14+
+        if pg_major >= 14:
+            queries["wal_stats"] = _PG_WAL_STATS
         for name, sql in queries.items():
             result = self.db_client.execute_query(sql)
             if "error" in result:
diff --git a/tools/pg-assistant/snapshot_compare.py b/tools/pg-assistant/snapshot_compare.py
index e517384..f035cc7 100644
--- a/tools/pg-assistant/snapshot_compare.py
+++ b/tools/pg-assistant/snapshot_compare.py
@@ -814,60 +814,6 @@ def _delta_row(
             "change_pct": f"{direction}{abs(pct):.1f}%",
         }
 
-    # -- LLM comparison analysis ---------------------------------------------
-
-    def _format_comparison_text(
-        self,
-        data_a: dict[str, Any],
-        data_b: dict[str, Any],
-        label_a: str,
-        label_b: str,
-        delta_table: list[dict[str, Any]],
-    ) -> str:
-        parts = [
-            f"SNAPSHOT COMPARISON REPORT\n{'=' * 60}",
-            f"Snapshot A: {label_a}",
-            f"Snapshot B: {label_b}\n",
-            "--- DELTA SUMMARY ---",
-        ]
-        for row in delta_table:
-            parts.append(
-                f"  {row['metric']}: {row[label_a]} -> {row[label_b]} "
-                f"(delta={row['delta']}, {row['change_pct']})"
-            )
-
-        parts.append("\n--- SNAPSHOT A: TOP SQL ---")
-        for i, row in enumerate(data_a.get("top_sql", [])[:10], 1):
-            parts.append(f"  [{i}] {_fmt(row)}")
-
-        parts.append("\n--- SNAPSHOT B: TOP SQL ---")
-        for i, row in enumerate(data_b.get("top_sql", [])[:10], 1):
-            parts.append(f"  [{i}] {_fmt(row)}")
-
-        parts.append("\n--- SNAPSHOT A: WAIT EVENTS ---")
-        for i, row in enumerate(data_a.get("wait_events", [])[:10], 1):
-            parts.append(f"  [{i}] {_fmt(row)}")
-
-        parts.append("\n--- SNAPSHOT B: WAIT EVENTS ---")
-        for i, row in enumerate(data_b.get("wait_events", [])[:10], 1):
-            parts.append(f"  [{i}] {_fmt(row)}")
-
-        return "\n".join(parts)
-
-    def _get_llm_comparison(self, text: str) -> str:
-        # Build programmatic comparison findings first, then ask LLM
-        # for a brief summary only.
-        try:
-            llm_prompt = (
-                text + "\n\n---\n"
-                "Based on the snapshot comparison data above, write 3-5 sentences "
-                "summarising what changed and what the DBA should investigate. "
-                "Do NOT invent any sql_ids, table names, or metrics."
-            )
-            return self.llm.generate(prompt=llm_prompt)
-        except (ConnectionError, RuntimeError) as exc:
-            return f"LLM comparison summary unavailable: {exc}"
-
     def _build_programmatic_comparison(
         self,
         data_a: dict[str, Any],

From f43ffa43dd6d95fbe5e253d9e9e579ae304a4fb6 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 6 Apr 2026 12:26:19 +0000
Subject: [PATCH 19/19] Add structured HTML/CSV parsers for uploaded report
 analysis (pgProfile, AWR, pg_stat_statements)

---
 tools/pg-assistant/auto_analyse.py | 790 ++++++++++++++++++++++++++++-
 1 file changed, 782 insertions(+), 8 deletions(-)

diff --git a/tools/pg-assistant/auto_analyse.py b/tools/pg-assistant/auto_analyse.py
index 004b351..7b09b3a 100644
--- a/tools/pg-assistant/auto_analyse.py
+++ b/tools/pg-assistant/auto_analyse.py
@@ -10,6 +10,7 @@
 import io
 import logging
 import re
+from html.parser import HTMLParser
 from typing import Any
 
 from db_client import BaseDBClient, DB_TYPE_ORACLE, DB_TYPE_POSTGRESQL
@@ -2178,9 +2179,8 @@ def analyse_awr_snaps(self, begin_snap: int, end_snap: int) -> dict[str, Any]:
     def analyse_uploaded_report(
         self, file_content: str, file_name: str
     ) -> dict[str, Any]:
-        """Parse an uploaded report file and display it."""
-        parsed = parse_uploaded_report(file_content, file_name)
-        return self._run_uploaded_report_analysis(parsed)
+        """Parse an uploaded report file and run programmatic analysis."""
+        return self._run_uploaded_report_analysis(file_content, file_name)
 
     def list_awr_snapshots(self) -> list[dict[str, Any]]:
         """Return available AWR snapshots from DBA_HIST_SNAPSHOT."""
@@ -2230,18 +2230,31 @@ def _run_llm_analysis(self, raw_data: dict[str, Any]) -> dict[str, Any]:
             "analysis": findings_report,
         }
 
-    def _run_uploaded_report_analysis(self, report_text: str) -> dict[str, Any]:
-        # For uploaded reports we cannot do structured analysis.
-        # Display the parsed text as-is — no LLM involved.
+    def _run_uploaded_report_analysis(
+        self, file_content: str, file_name: str
+    ) -> dict[str, Any]:
+        # Try to parse into structured data for programmatic analysis.
+        structured = parse_uploaded_report_structured(file_content, file_name)
+        if structured:
+            findings_report = _build_findings_report(structured)
+            report_text = self._format_report(structured)
+            return {
+                "raw_data": structured,
+                "report_text": report_text,
+                "analysis": findings_report,
+            }
+        # Fallback: display parsed text as-is
+        parsed_text = parse_uploaded_report(file_content, file_name)
         return {
             "raw_data": {},
-            "report_text": report_text,
+            "report_text": parsed_text,
             "analysis": (
                 "## Uploaded Report\n\n"
+                "Could not extract structured data from this report format. "
                 "The parsed report content is shown below. "
                 "For detailed programmatic analysis, use **Live** mode "
                 "which queries the database directly.\n\n"
-                "---\n\n" + report_text[:8000]
+                "---\n\n" + parsed_text[:8000]
             ),
         }
 
@@ -2576,3 +2589,764 @@ def _parse_text_report(content: str, file_name: str) -> str:
         parts.append(content)
 
     return "\n".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Structured report parsing — extract data into dict for _build_findings_report
+# ---------------------------------------------------------------------------
+
+
+class _HTMLTableExtractor(HTMLParser):
+    """Extract all HTML tables as list of list-of-dicts (header→value)."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.tables: list[list[dict[str, str]]] = []
+        self._in_table = False
+        self._in_thead = False
+        self._in_row = False
+        self._in_cell = False
+        self._headers: list[str] = []
+        self._current_row: list[str] = []
+        self._current_rows: list[list[str]] = []
+        self._cell_text = ""
+        self._current_headers: list[str] = []
+        # Track section headers (h1-h4, caption) preceding each table
+        self._section_headers: list[str] = []
+        self._last_heading = ""
+        self._in_heading = False
+        self._heading_text = ""
+
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+        tag = tag.lower()
+        if tag == "table":
+            self._in_table = True
+            self._headers = []
+            self._current_rows = []
+            self._current_headers = []
+        elif tag == "thead":
+            self._in_thead = True
+        elif tag == "tr":
+            self._in_row = True
+            self._current_row = []
+        elif tag in ("td", "th"):
+            self._in_cell = True
+            self._cell_text = ""
+        elif tag in ("h1", "h2", "h3", "h4", "caption"):
+            self._in_heading = True
+            self._heading_text = ""
+
+    def handle_endtag(self, tag: str) -> None:
+        tag = tag.lower()
+        if tag == "table":
+            self._in_table = False
+            if self._current_headers and self._current_rows:
+                rows = []
+                for raw in self._current_rows:
+                    row_dict: dict[str, str] = {}
+                    for i, hdr in enumerate(self._current_headers):
+                        row_dict[hdr] = raw[i] if i < len(raw) else ""
+                    rows.append(row_dict)
+                self.tables.append(rows)
+                self._section_headers.append(self._last_heading)
+        elif tag == "thead":
+            self._in_thead = False
+        elif tag == "tr":
+            self._in_row = False
+            if self._in_thead or (not self._current_headers and self._current_row):
+                self._current_headers = [c.strip().lower() for c in self._current_row]
+            elif self._current_headers:
+                self._current_rows.append(self._current_row)
+        elif tag in ("td", "th"):
+            self._in_cell = False
+            self._current_row.append(self._cell_text.strip())
+        elif tag in ("h1", "h2", "h3", "h4", "caption"):
+            self._in_heading = False
+            self._last_heading = self._heading_text.strip().lower()
+
+    def handle_data(self, data: str) -> None:
+        if self._in_cell:
+            self._cell_text += data
+        if self._in_heading:
+            self._heading_text += data
+
+
+def _extract_html_tables(
+    html: str,
+) -> list[tuple[str, list[dict[str, str]]]]:
+    """Return list of (section_heading, rows) from HTML tables."""
+    parser = _HTMLTableExtractor()
+    parser.feed(html)
+    result: list[tuple[str, list[dict[str, str]]]] = []
+    for i, table_rows in enumerate(parser.tables):
+        heading = parser._section_headers[i] if i < len(parser._section_headers) else ""
+        result.append((heading, table_rows))
+    return result
+
+
+def _match_heading(heading: str, *keywords: str) -> bool:
+    """Check if heading contains ALL given keywords (case-insensitive)."""
+    h = heading.lower()
+    return all(k in h for k in keywords)
+
+
+def _parse_pgprofile_structured(html: str) -> dict[str, Any] | None:
+    """Parse pgProfile HTML report into structured dict for analysis."""
+    tables = _extract_html_tables(html)
+    if not tables:
+        return None
+
+    sections: dict[str, Any] = {"db_type": DB_TYPE_POSTGRESQL}
+    found_any = False
+
+    for heading, rows in tables:
+        if not rows:
+            continue
+
+        # --- Top SQL by elapsed time ---
+        if _match_heading(heading, "sql", "elapsed") or _match_heading(
+            heading, "top", "elapsed"
+        ):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "queryid": r.get("queryid", r.get("query id", "")),
+                        "query_text": r.get(
+                            "query text",
+                            r.get("query", r.get("sql text", "")),
+                        ),
+                        "total_exec_sec": _safe_float(
+                            r.get(
+                                "total elapsed",
+                                r.get(
+                                    "elapsed",
+                                    r.get("total_time", r.get("total time", 0)),
+                                ),
+                            )
+                        ),
+                        "calls": _safe_int(r.get("calls", r.get("executions", 0))),
+                        "mean_exec_sec": _safe_float(
+                            r.get(
+                                "mean elapsed",
+                                r.get("mean_time", r.get("mean time", 0)),
+                            )
+                        ),
+                        "shared_blks_hit": _safe_int(
+                            r.get(
+                                "shared_blks_hit",
+                                r.get("shared blks hit", 0),
+                            )
+                        ),
+                        "shared_blks_read": _safe_int(
+                            r.get(
+                                "shared_blks_read",
+                                r.get("shared blks read", 0),
+                            )
+                        ),
+                    }
+                )
+            if mapped:
+                sections["top_queries"] = mapped
+                found_any = True
+
+        # --- Top SQL by executions ---
+        elif _match_heading(heading, "sql", "execution") or _match_heading(
+            heading, "top", "execution"
+        ):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "queryid": r.get("queryid", r.get("query id", "")),
+                        "query_text": r.get(
+                            "query text",
+                            r.get("query", r.get("sql text", "")),
+                        ),
+                        "calls": _safe_int(r.get("calls", r.get("executions", 0))),
+                        "total_exec_sec": _safe_float(
+                            r.get(
+                                "total elapsed",
+                                r.get("total_time", r.get("total time", 0)),
+                            )
+                        ),
+                        "mean_exec_sec": _safe_float(
+                            r.get(
+                                "mean elapsed",
+                                r.get("mean_time", r.get("mean time", 0)),
+                            )
+                        ),
+                    }
+                )
+            if mapped:
+                sections["high_execution_count"] = mapped
+                found_any = True
+
+        # --- Top SQL by I/O / reads ---
+        elif _match_heading(heading, "sql", "read") or _match_heading(
+            heading, "sql", "i/o"
+        ):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "queryid": r.get("queryid", r.get("query id", "")),
+                        "query_text": r.get(
+                            "query text",
+                            r.get("query", r.get("sql text", "")),
+                        ),
+                        "total_exec_sec": _safe_float(
+                            r.get(
+                                "total elapsed",
+                                r.get("total_time", r.get("total time", 0)),
+                            )
+                        ),
+                        "calls": _safe_int(r.get("calls", r.get("executions", 0))),
+                        "shared_blks_read": _safe_int(
+                            r.get(
+                                "reads",
+                                r.get(
+                                    "shared_blks_read",
+                                    r.get("shared blks read", 0),
+                                ),
+                            )
+                        ),
+                    }
+                )
+            if mapped:
+                sections["top_cpu_queries"] = mapped
+                found_any = True
+
+        # --- Top SQL by planning time ---
+        elif _match_heading(heading, "sql", "plan"):
+            # Map to high_elapsed_per_exec for analysis
+            mapped = []
+            for r in rows[:20]:
+                avg = _safe_float(
+                    r.get(
+                        "mean plan",
+                        r.get("mean_plan_time", r.get("mean plan time", 0)),
+                    )
+                )
+                if avg > 0.001:
+                    mapped.append(
+                        {
+                            "queryid": r.get("queryid", r.get("query id", "")),
+                            "query_text": r.get(
+                                "query text",
+                                r.get("query", r.get("sql text", "")),
+                            ),
+                            "avg_elapsed_sec": avg,
+                            "total_exec_sec": _safe_float(
+                                r.get(
+                                    "total plan",
+                                    r.get(
+                                        "total_plan_time",
+                                        r.get("total plan time", 0),
+                                    ),
+                                )
+                            ),
+                            "calls": _safe_int(r.get("calls", r.get("executions", 0))),
+                        }
+                    )
+            if mapped:
+                sections.setdefault("high_elapsed_per_exec", mapped)
+                found_any = True
+
+        # --- Top SQL by temp usage ---
+        elif _match_heading(heading, "sql", "temp") or _match_heading(
+            heading, "temp", "file"
+        ):
+            mapped = []
+            for r in rows[:20]:
+                temp = _safe_float(
+                    r.get(
+                        "temp",
+                        r.get("temp_blks_written", r.get("temp blks written", 0)),
+                    )
+                )
+                if temp > 0:
+                    mapped.append(
+                        {
+                            "queryid": r.get("queryid", r.get("query id", "")),
+                            "query_text": r.get(
+                                "query text",
+                                r.get("query", r.get("sql text", "")),
+                            ),
+                            "temp_mb": temp,
+                            "calls": _safe_int(r.get("calls", r.get("executions", 0))),
+                            "total_exec_sec": _safe_float(
+                                r.get(
+                                    "total elapsed",
+                                    r.get(
+                                        "total_time",
+                                        r.get("total time", 0),
+                                    ),
+                                )
+                            ),
+                        }
+                    )
+            if mapped:
+                sections["temp_file_usage"] = mapped
+                found_any = True
+
+        # --- Top tables by sequential scans ---
+        elif _match_heading(heading, "table", "seq") or _match_heading(
+            heading, "sequential scan"
+        ):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "schemaname": r.get("schema", r.get("schemaname", "public")),
+                        "relname": r.get(
+                            "table",
+                            r.get("relname", r.get("relation", "")),
+                        ),
+                        "seq_scan": _safe_int(r.get("seq scan", r.get("seq_scan", 0))),
+                        "idx_scan": _safe_int(r.get("idx scan", r.get("idx_scan", 0))),
+                        "n_live_tup": _safe_int(
+                            r.get(
+                                "live",
+                                r.get(
+                                    "n_live_tup",
+                                    r.get("live tuples", 0),
+                                ),
+                            )
+                        ),
+                        "table_size_mb": _safe_float(
+                            r.get("size", r.get("table size", 0))
+                        ),
+                    }
+                )
+            if mapped:
+                sections["seq_scan_tables"] = mapped
+                found_any = True
+
+        # --- Top tables by DML / inserts+updates+deletes ---
+        elif _match_heading(heading, "table", "dml") or _match_heading(
+            heading, "table", "insert"
+        ):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "schemaname": r.get("schema", r.get("schemaname", "public")),
+                        "relname": r.get(
+                            "table",
+                            r.get("relname", r.get("relation", "")),
+                        ),
+                        "total_size_mb": _safe_float(
+                            r.get(
+                                "size",
+                                r.get("table size", r.get("total_size_mb", 0)),
+                            )
+                        ),
+                        "table_size_mb": _safe_float(
+                            r.get(
+                                "table size",
+                                r.get("table_size_mb", 0),
+                            )
+                        ),
+                        "n_live_tup": _safe_int(
+                            r.get(
+                                "live",
+                                r.get("n_live_tup", r.get("live tuples", 0)),
+                            )
+                        ),
+                        "n_tup_ins": _safe_int(
+                            r.get("ins", r.get("n_tup_ins", r.get("inserts", 0)))
+                        ),
+                        "n_tup_upd": _safe_int(
+                            r.get("upd", r.get("n_tup_upd", r.get("updates", 0)))
+                        ),
+                        "n_tup_del": _safe_int(
+                            r.get("del", r.get("n_tup_del", r.get("deletes", 0)))
+                        ),
+                        "n_dead_tup": _safe_int(
+                            r.get(
+                                "dead",
+                                r.get("n_dead_tup", r.get("dead tuples", 0)),
+                            )
+                        ),
+                    }
+                )
+            if mapped:
+                sections["table_sizes"] = mapped
+                found_any = True
+
+        # --- Wait events ---
+        elif _match_heading(heading, "wait") and not _match_heading(heading, "sql"):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "event": r.get(
+                            "event",
+                            r.get("wait event", r.get("event_name", "")),
+                        ),
+                        "total_waits": _safe_int(
+                            r.get("waits", r.get("total_waits", r.get("count", 0)))
+                        ),
+                        "time_waited_sec": _safe_float(
+                            r.get(
+                                "waited",
+                                r.get(
+                                    "time_waited",
+                                    r.get("time waited", 0),
+                                ),
+                            )
+                        ),
+                    }
+                )
+            if mapped:
+                sections["wait_events"] = mapped
+                found_any = True
+
+        # --- Vacuum / dead tuples ---
+        elif _match_heading(heading, "vacuum") or _match_heading(heading, "dead"):
+            mapped = []
+            for r in rows[:20]:
+                dp = _safe_float(r.get("dead_pct", r.get("dead %", 0)))
+                dead = _safe_int(
+                    r.get(
+                        "dead",
+                        r.get("n_dead_tup", r.get("dead tuples", 0)),
+                    )
+                )
+                if dead > 0 or dp > 0:
+                    mapped.append(
+                        {
+                            "schemaname": r.get(
+                                "schema", r.get("schemaname", "public")
+                            ),
+                            "relname": r.get(
+                                "table",
+                                r.get("relname", r.get("relation", "")),
+                            ),
+                            "dead_pct": dp,
+                            "n_dead_tup": dead,
+                            "table_size_mb": _safe_float(
+                                r.get("size", r.get("table size", 0))
+                            ),
+                            "last_autovacuum": r.get(
+                                "last autovacuum",
+                                r.get("last_autovacuum", ""),
+                            ),
+                        }
+                    )
+            if mapped:
+                sections["bloat_estimate"] = mapped
+                found_any = True
+
+        # --- Database statistics ---
+        elif _match_heading(heading, "database", "stat"):
+            if rows:
+                r = rows[0]
+                sections["database_stats"] = [
+                    {
+                        "cache_hit_pct": _safe_float(
+                            r.get(
+                                "hit ratio",
+                                r.get("cache_hit_pct", r.get("blks_hit_%", 100)),
+                            )
+                        ),
+                        "xact_commit": _safe_int(
+                            r.get(
+                                "commits",
+                                r.get("xact_commit", r.get("xact commit", 0)),
+                            )
+                        ),
+                        "xact_rollback": _safe_int(
+                            r.get(
+                                "rollbacks",
+                                r.get(
+                                    "xact_rollback",
+                                    r.get("xact rollback", 0),
+                                ),
+                            )
+                        ),
+                        "numbackends": _safe_int(
+                            r.get(
+                                "backends",
+                                r.get("numbackends", r.get("connections", 0)),
+                            )
+                        ),
+                        "temp_bytes": _safe_int(
+                            r.get("temp_bytes", r.get("temp bytes", 0))
+                        ),
+                        "temp_files": _safe_int(
+                            r.get("temp_files", r.get("temp files", 0))
+                        ),
+                    }
+                ]
+                found_any = True
+
+    if not found_any:
+        return None
+    return sections
+
+
+def _parse_csv_structured(content: str) -> dict[str, Any] | None:
+    """Parse pg_stat_statements CSV export into structured dict."""
+    reader = csv.DictReader(io.StringIO(content))
+    rows = list(reader)
+    if not rows:
+        return None
+
+    # Normalise headers to lowercase
+    normalised: list[dict[str, str]] = []
+    for row in rows:
+        normalised.append({k.lower().strip(): v for k, v in row.items()})
+    rows = normalised
+
+    sections: dict[str, Any] = {"db_type": DB_TYPE_POSTGRESQL}
+
+    # Map CSV columns to expected structure
+    top_queries: list[dict[str, Any]] = []
+    high_exec: list[dict[str, Any]] = []
+    high_elapsed: list[dict[str, Any]] = []
+    temp_usage: list[dict[str, Any]] = []
+
+    for r in rows:
+        qid = r.get("queryid", r.get("query_id", ""))
+        query_text = r.get("query", r.get("query_text", ""))
+        calls = _safe_int(r.get("calls", r.get("executions", 0)))
+        total_time = _safe_float(
+            r.get(
+                "total_exec_time",
+                r.get("total_time", r.get("total_elapsed", 0)),
+            )
+        )
+        # pg_stat_statements reports time in ms, convert to sec
+        if total_time > 1000:
+            total_time_sec = total_time / 1000
+        else:
+            total_time_sec = total_time
+        mean_time = _safe_float(
+            r.get(
+                "mean_exec_time",
+                r.get("mean_time", r.get("mean_elapsed", 0)),
+            )
+        )
+        if mean_time > 1000:
+            mean_time_sec = mean_time / 1000
+        else:
+            mean_time_sec = mean_time
+        blks_hit = _safe_int(r.get("shared_blks_hit", 0))
+        blks_read = _safe_int(r.get("shared_blks_read", 0))
+        temp_blks = _safe_int(r.get("temp_blks_written", r.get("temp_blks_read", 0)))
+
+        entry = {
+            "queryid": qid,
+            "query_text": query_text,
+            "total_exec_sec": total_time_sec,
+            "calls": calls,
+            "mean_exec_sec": mean_time_sec,
+            "shared_blks_hit": blks_hit,
+            "shared_blks_read": blks_read,
+        }
+        top_queries.append(entry)
+
+        if calls > 1000:
+            high_exec.append(entry)
+        if mean_time_sec > 1:
+            high_elapsed.append({**entry, "avg_elapsed_sec": mean_time_sec})
+        if temp_blks > 0:
+            temp_usage.append(
+                {
+                    **entry,
+                    "temp_mb": temp_blks * 8 / 1024,  # 8KB blocks to MB
+                }
+            )
+
+    if not top_queries:
+        return None
+
+    # Sort by total elapsed desc
+    top_queries.sort(key=lambda x: x["total_exec_sec"], reverse=True)
+    high_exec.sort(key=lambda x: x["calls"], reverse=True)
+    high_elapsed.sort(key=lambda x: x["avg_elapsed_sec"], reverse=True)
+    temp_usage.sort(key=lambda x: x["temp_mb"], reverse=True)
+
+    sections["top_queries"] = top_queries[:20]
+    if high_exec:
+        sections["high_execution_count"] = high_exec[:20]
+    if high_elapsed:
+        sections["high_elapsed_per_exec"] = high_elapsed[:20]
+    if temp_usage:
+        sections["temp_file_usage"] = temp_usage[:20]
+
+    return sections
+
+
+def _parse_awr_html_structured(html: str) -> dict[str, Any] | None:
+    """Parse AWR HTML report into structured dict for Oracle analysis."""
+    tables = _extract_html_tables(html)
+    if not tables:
+        return None
+
+    sections: dict[str, Any] = {"db_type": DB_TYPE_ORACLE}
+    found_any = False
+
+    for heading, rows in tables:
+        if not rows:
+            continue
+
+        # --- Top SQL by elapsed time ---
+        if _match_heading(heading, "sql", "elapsed"):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "sql_id": r.get("sql id", r.get("sql_id", "")),
+                        "sql_text": r.get(
+                            "sql text",
+                            r.get("sql_text", r.get("sql module", "")),
+                        ),
+                        "elapsed_sec": _safe_float(
+                            r.get(
+                                "elapsed time (s)",
+                                r.get("elapsed", r.get("elapsed_sec", 0)),
+                            )
+                        ),
+                        "executions": _safe_int(r.get("executions", r.get("execs", 0))),
+                        "buffer_gets": _safe_int(
+                            r.get(
+                                "buffer gets",
+                                r.get("buffer_gets", r.get("gets", 0)),
+                            )
+                        ),
+                    }
+                )
+            if mapped:
+                sections["top_elapsed_sql"] = mapped
+                found_any = True
+
+        # --- Top SQL by CPU ---
+        elif _match_heading(heading, "sql", "cpu"):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "sql_id": r.get("sql id", r.get("sql_id", "")),
+                        "sql_text": r.get(
+                            "sql text",
+                            r.get("sql_text", r.get("sql module", "")),
+                        ),
+                        "cpu_sec": _safe_float(
+                            r.get(
+                                "cpu time (s)",
+                                r.get("cpu", r.get("cpu_sec", 0)),
+                            )
+                        ),
+                        "executions": _safe_int(r.get("executions", r.get("execs", 0))),
+                        "buffer_gets": _safe_int(
+                            r.get(
+                                "buffer gets",
+                                r.get("buffer_gets", r.get("gets", 0)),
+                            )
+                        ),
+                    }
+                )
+            if mapped:
+                sections["top_cpu_sql"] = mapped
+                found_any = True
+
+        # --- Wait events ---
+        elif _match_heading(heading, "wait") and _match_heading(heading, "event"):
+            mapped = []
+            for r in rows[:20]:
+                mapped.append(
+                    {
+                        "event": r.get(
+                            "event",
+                            r.get("event name", r.get("wait event", "")),
+                        ),
+                        "total_waits": _safe_int(
+                            r.get("waits", r.get("total waits", 0))
+                        ),
+                        "time_waited_sec": _safe_float(
+                            r.get(
+                                "time (s)",
+                                r.get(
+                                    "total wait time (s)",
+                                    r.get("time waited", 0),
+                                ),
+                            )
+                        ),
+                    }
+                )
+            if mapped:
+                sections["wait_events"] = mapped
+                found_any = True
+
+        # --- System stats / load profile ---
+        elif _match_heading(heading, "system") or _match_heading(
+            heading, "load profile"
+        ):
+            mapped = []
+            for r in rows[:30]:
+                name = r.get(
+                    "statistic name",
+                    r.get("statistic", r.get("name", "")),
+                )
+                val = r.get("value", r.get("total", r.get("per second", "")))
+                if name:
+                    mapped.append({"name": name, "value": _safe_int(val)})
+            if mapped:
+                sections["system_stats"] = mapped
+                found_any = True
+
+        # --- SGA ---
+        elif _match_heading(heading, "sga"):
+            mapped = []
+            for r in rows[:10]:
+                name = r.get("pool", r.get("name", r.get("component", "")))
+                size = r.get("size", r.get("size (mb)", r.get("bytes", "")))
+                if name:
+                    mapped.append({"name": name, "size_mb": _safe_float(size)})
+            if mapped:
+                sections["sga_info"] = mapped
+                found_any = True
+
+    if not found_any:
+        return None
+    return sections
+
+
+def parse_uploaded_report_structured(
+    content: str, file_name: str
+) -> dict[str, Any] | None:
+    """Try to parse an uploaded report into structured dict.
+
+    Returns None if the report cannot be parsed into structured data.
+    """
+    lower = file_name.lower()
+
+    if lower.endswith(".csv"):
+        return _parse_csv_structured(content)
+
+    if lower.endswith((".html", ".htm")):
+        content_lower = content[:3000].lower()
+        if "pgprofile" in content_lower or "pg_profile" in content_lower:
+            return _parse_pgprofile_structured(content)
+        if "awr" in content_lower or "workload repository" in content_lower:
+            return _parse_awr_html_structured(content)
+        # Try pgProfile first (more common), then AWR
+        result = _parse_pgprofile_structured(content)
+        if result:
+            return result
+        return _parse_awr_html_structured(content)
+
+    # Text reports — attempt to detect tabular data
+    content_lower = content[:3000].lower()
+    if "pgprofile" in content_lower or "pg_profile" in content_lower:
+        # pgProfile text reports may contain HTML tables
+        if "<table" in content.lower():
+            return _parse_pgprofile_structured(content)
+    if "awr" in content_lower or "workload repository" in content_lower:
+        if "<table" in content.lower():
+            return _parse_awr_html_structured(content)
+
+    return None