diff --git a/.env.example b/.env.example index 7fdd4f9c..bf9ed935 100644 --- a/.env.example +++ b/.env.example @@ -1,37 +1,109 @@ -# Example configuration for text2sql with AI-generated user responses +# QueryWeaver example environment file +# Copy this to `.env` and edit values before running the app or the Docker image: +# +# cp .env.example .env +# # edit .env +# +# Minimal required variables for a basic run (these are left uncommented below): +# - FASTAPI_SECRET_KEY (REQUIRED) +# - FALKORDB_URL (REQUIRED) — preferred single connection string for FalkorDB +# +# Optional (commented) variables are provided as examples; uncomment and set them +# only if you need the functionality (OAuth, AI keys, local host/port overrides, etc.). -## API Keys for LiteLLM (required for AI functionality) -# Azure OpenAI +# ----------------------------- +# Application / Server settings +# ----------------------------- +# REQUIRED: secret used by FastAPI session middleware — change this before running in production +FASTAPI_SECRET_KEY=your_super_secret_key_here + +# Optional: enable debug/reload when running the app directly +# FASTAPI_DEBUG=False + +# Optional: set application environment (development, staging, production) +# Default: development +# APP_ENV=development + +# Optional: allow OAuth over HTTP in development (disable in production) +# OAUTHLIB_INSECURE_TRANSPORT=1 + +# ----------------------------- +# FalkorDB connection (REQUIRED / preferred) +# ----------------------------- +# Preferred: single connection URL. Edit to point at your FalkorDB/Redis instance. +# Example: redis://localhost:6379/0 +FALKORDB_URL=redis://localhost:6379/0 # REQUIRED - change to your FalkorDB URL + +# Optional: separate host/port settings for local testing (only used if FALKORDB_URL is not set) +# FALKORDB_HOST=localhost +# FALKORDB_PORT=6379 + +# ----------------------------- +# Optional API / secret tokens +# ----------------------------- +# API token for internal API access (optional) +# SECRET_TOKEN=your_secret_token +# SECRET_TOKEN_ERP=your_erp_token + +# ----------------------------- +# AI / LLM configuration (optional) +# ----------------------------- +# The default is to use Azure OpenAI if all three variables are set. +# If the OPENAI_API_KEY is set, it will use OpenAI directly. + +# Azure OpenAI (example) # AZURE_API_KEY=your_azure_api_key # AZURE_API_BASE=https://your-resource.openai.azure.com/ # AZURE_API_VERSION=2023-05-15 -# OpenAI +# OpenAI (example) # OPENAI_API_KEY=your_openai_api_key -# Google OAuth for authentication -GOOGLE_CLIENT_ID=your_google_client_id -GOOGLE_CLIENT_SECRET=your_google_client_secret - -# GitHub OAuth for authentication -GITHUB_CLIENT_ID=your_github_client_id -GITHUB_CLIENT_SECRET=your_github_client_secret +# Optional: override default model names from api/config.py +# COMPLETION_MODEL=azure/gpt-4.1 +# EMBEDDING_MODEL=azure/text-embedding-ada-002 -# FASTAPI configuration -FASTAPI_SECRET_KEY=your_super_secret_key_here +# ----------------------------- +# OAuth configuration (optional — uncomment to enable login flows) +# ----------------------------- +# Google OAuth +# GOOGLE_CLIENT_ID=your_google_client_id +# GOOGLE_CLIENT_SECRET=your_google_client_secret -# FalkorDB configuration -FALKORDB_HOST=localhost -FALKORDB_PORT=6379 +# GitHub OAuth +# GITHUB_CLIENT_ID=your_github_client_id +# GITHUB_CLIENT_SECRET=your_github_client_secret -# Optional tokens for API access -SECRET_TOKEN=your_secret_token -SECRET_TOKEN_ERP=your_erp_token +# If your OAuth app uses a different base URL than the request base (e.g., using 127.0.0.1 vs localhost) +# you can override the base used for building callback URLs. Example: +# OAUTH_BASE_URL=http://localhost:5000 -# AI Model Configuration (adjust as needed) -# These are set in api/config.py but can be overridden via environment variables -# COMPLETION_MODEL=azure/gpt-4.1 # Model used for SQL generation and response formatting -# EMBEDDING_MODEL=azure/text-embedding-ada-002 # Model used for embeddings +# ----------------------------- +# Email Configuration (optional - for sending invitation emails) +# ----------------------------- +# MAIL_SERVER=smtp.mailgun.org +# MAIL_PORT=587 +# MAIL_USE_TLS=True +# MAIL_USERNAME=your_mail_username +# MAIL_PASSWORD=your_mail_password +# MAIL_DEFAULT_SENDER=noreply@yourdomain.com +# EMAIL_AUTH_ENABLED=false -# Google Tag Manager (optional) +# ----------------------------- +# Frontend / analytics (optional) +# ----------------------------- +# Google Tag Manager ID (optional) # GOOGLE_TAG_MANAGER_ID=GTM-XXXXXXX + +# ----------------------------- +# Optional MCP (Model Context Protocol) settings +# ----------------------------- +# Control QueryWeaver's built-in MCP endpoints (default: enabled) +# Set to "true" to disable mounting the MCP HTTP surface without editing code +# DISABLE_MCP=false + +# Notes +# ----------------------------- +# - Keep secrets out of source control. Use your local `.env` (ignored by git) or a secrets manager in production. +# - For Docker runs, pass `--env-file .env` to `docker run` or provide individual `-e` args. +# - See api/config.py for additional runtime configuration defaults and model overrides. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 5d9bdd66..32367ffb 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -18,18 +18,27 @@ QueryWeaver is an open-source Text2SQL tool that transforms natural language int ## Essential Build & Validation Commands -**CRITICAL**: Always run these commands in the exact order specified. Many commands will fail if prerequisites are not met. +Follow this order for a reliable local setup; if you customize the steps, ensure each prerequisite (dependencies, `.env`, Playwright) is completed. -### 1. Initial Setup (Required for all operations) +### 1. Initial Setup (recommended for new contributors) ```bash # Install pipenv if not available pip install pipenv -# Install all dependencies (ALWAYS run this first) +# Install dependencies (backend + frontend) and prepare dev tools +# Recommended: use the Make helper which installs Python deps and frontend deps make install -# OR manually: pipenv sync --dev -# Set up environment file (REQUIRED) +# Prepare the full development environment (installs Playwright browsers too) +# This runs `make install` then Playwright install steps. +make setup-dev + +# OR manual steps if you prefer more granular control: +# pipenv sync --dev +# pipenv run playwright install chromium +# pipenv run playwright install-deps + +# Set up environment file cp .env.example .env # Edit .env with required values (see Environment Setup section) ``` @@ -50,7 +59,7 @@ pipenv run playwright install-deps ### 3. Testing Commands ```bash # IMPORTANT: Unit tests require FalkorDB running or will fail with connection errors -# Start FalkorDB for testing (requires Docker) +# You can start a local test FalkorDB using the included Make helper make docker-falkordb # Run unit tests only (safer, doesn't require browser) @@ -122,22 +131,28 @@ make clean ## Environment Setup Requirements -**CRITICAL**: Create `.env` file from `.env.example` and configure these essential variables: +Create `.env` file from `.env.example` and configure these essential variables: ```bash # REQUIRED for FastAPI to start FASTAPI_SECRET_KEY=your_super_secret_key_here -FASTAPI_DEBUG=False -# REQUIRED for database connection (most functionality) -FALKORDB_HOST=localhost -FALKORDB_PORT=6379 +# Optional: set application environment (development, staging, production) +# Default: development (affects session cookie security for OAuth) +APP_ENV=development + +# REQUIRED for database connection (preferred) +# Use a single connection string if possible. Example: +# FALKORDB_URL=redis://localhost:6379/0 + +# Optional: enable debug/reload when running the app directly +# FASTAPI_DEBUG=False -# REQUIRED for full functionality (OAuth) -GOOGLE_CLIENT_ID=your_google_client_id -GOOGLE_CLIENT_SECRET=your_google_client_secret -GITHUB_CLIENT_ID=your_github_client_id -GITHUB_CLIENT_SECRET=your_github_client_secret +# REQUIRED for full functionality (OAuth, only if you use login flows) +# GOOGLE_CLIENT_ID=your_google_client_id +# GOOGLE_CLIENT_SECRET=your_google_client_secret +# GITHUB_CLIENT_ID=your_github_client_id +# GITHUB_CLIENT_SECRET=your_github_client_secret # OPTIONAL: AI model configuration (defaults in api/config.py) # AZURE_API_KEY=your_azure_api_key @@ -147,6 +162,7 @@ GITHUB_CLIENT_SECRET=your_github_client_secret **For testing in CI/development**, minimal `.env` setup: ```bash FASTAPI_SECRET_KEY=test-secret-key +APP_ENV=development FASTAPI_DEBUG=False FALKORDB_HOST=localhost FALKORDB_PORT=6379 @@ -267,7 +283,7 @@ All workflows follow this pattern: - Python 3.12 setup - pipenv installation - pipenv sync --dev -- .env file creation with test values +- .env file creation with test values (use FALKORDB_URL in CI) - FalkorDB service startup (for tests requiring DB) - Playwright browser installation (for E2E tests) ``` @@ -307,6 +323,45 @@ Before submitting any changes, run these validation steps: - `tests/e2e/README.md`: Comprehensive E2E testing documentation - `setup_e2e_tests.sh`: Automated test environment setup script +### MCP (Model Context Protocol) + +QueryWeaver optionally exposes an MCP HTTP surface (mounted at `/mcp`) to allow external MCP clients to call QueryWeaver's Text2SQL operations. Key points for coding agents and reviewers: + +- Runtime toggle: the built-in MCP endpoints can be disabled with the env var `DISABLE_MCP=true`. Default behavior is enabled. +- Client config: consumers typically use an `mcp.json` (or client-specific config) that points to the MCP URL, for example: + +```json +{ + "servers": { + "queryweaver": { + "type": "http", + "url": "http://127.0.0.1:5000/mcp", + "headers": { + "Authorization": "Bearer your_token_here" + } + } + }, + "inputs": [] +} +``` + +- Tools and examples: projects like GitMCP show common client configurations for Cursor, VSCode, and other MCP-capable tools; use those patterns for guidance when writing docs or adding examples in this repo. +- Security: avoid embedding bearer tokens in repo files. Prefer runtime injection via env files or secret managers. If you need to demonstrate a token in tests, use mocked tokens and don't commit them. + +Example: generate `mcp.json` from an environment token (pseudo): + +```bash +export MQW_TOKEN="secret-token" +cat > mcp.json <= '3.9'", - "version": "==1.6.2" + "version": "==1.6.3" }, "backoff": { "hashes": [ @@ -244,7 +243,7 @@ "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87", "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b" ], - "markers": "platform_python_implementation != 'PyPy'", + "markers": "python_version >= '3.8'", "version": "==1.17.1" }, "charset-normalizer": { @@ -417,6 +416,46 @@ "markers": "python_version >= '3.8'", "version": "==0.116.1" }, + "fastapi-mcp": { + "hashes": [ + "sha256:d4a3fe7966af24d44e4b412720561c95eb12bed999a4443a88221834b3b15aec", + "sha256:d4ca9410996f4c7b8ea0d7b20fdf79878dc359ebf89cbf3b222e0b675a55097d" + ], + "index": "pypi", + "markers": "python_version >= '3.10'", + "version": "==0.4.0" + }, + "fastuuid": { + "hashes": [ + "sha256:02acaea2c955bb2035a7d8e7b3fba8bd623b03746ae278e5fa932ef54c702f9f", + "sha256:0302f5acf54dc75de30103025c5a95db06d6c2be36829043a0aa16fc170076bc", + "sha256:07afc8e674e67ac3d35a608c68f6809da5fab470fb4ef4469094fdb32ba36c51", + "sha256:0df2ea4c9db96fd8f4fa38d0e88e309b3e56f8fd03675a2f6958a5b082a0c1e4", + "sha256:22a900ef0956aacf862b460e20541fdae2d7c340594fe1bd6fdcb10d5f0791a9", + "sha256:2bced35269315d16fe0c41003f8c9d63f2ee16a59295d90922cad5e6a67d0418", + "sha256:328694a573fe9dce556b0b70c9d03776786801e028d82f0b6d9db1cb0521b4d1", + "sha256:458f1bc3ebbd76fdb89ad83e6b81ccd3b2a99fa6707cd3650b27606745cfb170", + "sha256:4db1bc7b8caa1d7412e1bea29b016d23a8d219131cff825b933eb3428f044dca", + "sha256:7946b4a310cfc2d597dcba658019d72a2851612a2cebb949d809c0e2474cf0a6", + "sha256:7b15c54d300279ab20a9cc0579ada9c9f80d1bc92997fc61fb7bf3103d7cb26b", + "sha256:7cfd2092253d3441f6a8c66feff3c3c009da25a5b3da82bc73737558543632be", + "sha256:7fe2407316a04ee8f06d3dbc7eae396d0a86591d92bafe2ca32fce23b1145786", + "sha256:82106e4b0a24f4f2f73c88f89dadbc1533bb808900740ca5db9bbb17d3b0c824", + "sha256:8790221325b376e1122e95f865753ebf456a9fb8faf0dca4f9bf7a3ff620e413", + "sha256:8fc66b11423e6f3e1937385f655bedd67aebe56a3dcec0cb835351cfe7d358c9", + "sha256:9303617e887429c193d036d47d0b32b774ed3618431123e9106f610d601eb57e", + "sha256:a1b6764dd42bf0c46c858fb5ade7b7a3d93b7a27485a7a5c184909026694cd88", + "sha256:a8f0f83fbba6dc44271a11b22e15838641b8c45612cdf541b4822a5930f6893c", + "sha256:b19361ee649365eefc717ec08005972d3d1eb9ee39908022d98e3bfa9da59e37", + "sha256:b9b31dd488d0778c36f8279b306dc92a42f16904cba54acca71e107d65b60b0c", + "sha256:d0bd4e5b35aad2826403f4411937c89e7c88857b1513fe10f696544c03e9bd8e", + "sha256:e41656457c34b5dcb784729537ea64c7d9bbaf7047b480c6c6a64c53379f455a", + "sha256:e4b12d3e23515e29773fa61644daa660ceb7725e05397a986c2109f512579a48", + "sha256:ed9f449cba8cf16cced252521aee06e633d50ec48c807683f21cc1d89e193eb0" + ], + "markers": "python_version >= '3.8'", + "version": "==0.12.0" + }, "filelock": { "hashes": [ "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58", @@ -567,7 +606,7 @@ "sha256:ad1022e9a998e784c97b2173965d07fe33ee26e4594770b7785a8cc8f922cd95", "sha256:c99073ce404462e909f1d5839b2d14a3827b8fe75ed8aed551ba6609c026c803" ], - "markers": "platform_machine == 'x86_64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'aarch64'", + "markers": "python_version >= '3.8'", "version": "==1.1.9" }, "httpcore": { @@ -586,6 +625,14 @@ "markers": "python_version >= '3.8'", "version": "==0.28.1" }, + "httpx-sse": { + "hashes": [ + "sha256:8f44d34414bc7b21bf3602713005c5df4917884f76072479b21f68befa4ea26e", + "sha256:cba42174344c3a5b06f255ce65b350880f962d99ead85e776f23c6618a377a37" + ], + "markers": "python_version >= '3.9'", + "version": "==0.4.1" + }, "huggingface-hub": { "hashes": [ "sha256:9b365d781739c93ff90c359844221beef048403f1bc1f1c123c191257c3c890a", @@ -730,12 +777,20 @@ }, "litellm": { "hashes": [ - "sha256:a72c3e05bcb0e50ac1804f0df09d0d7bf5cb41e84351e1609a960033b0ef01c1", - "sha256:d8baf4b9988df599b55cb675808bbe22cedee2f099ba883684fe3f23af8d13a9" + "sha256:938f05075372f26098211ea9b3cb0a6bb7b46111330226b70d42d40bd307812f", + "sha256:d5a3a3efda04999b60ec0d1c29c1eaaa12f89a7b29db4bda691c7fb55b4fa6ad" ], "index": "pypi", "markers": "python_version not in '2.7, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7' and python_version >= '3.8'", - "version": "==1.75.9" + "version": "==1.76.1" + }, + "markdown-it-py": { + "hashes": [ + "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", + "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3" + ], + "markers": "python_version >= '3.10'", + "version": "==4.0.0" }, "markupsafe": { "hashes": [ @@ -804,6 +859,22 @@ "markers": "python_version >= '3.9'", "version": "==3.0.2" }, + "mcp": { + "hashes": [ + "sha256:165306a8fd7991dc80334edd2de07798175a56461043b7ae907b279794a834c5", + "sha256:c314e7c8bd477a23ba3ef472ee5a32880316c42d03e06dcfa31a1cc7a73b65df" + ], + "markers": "python_version >= '3.10'", + "version": "==1.13.1" + }, + "mdurl": { + "hashes": [ + "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", + "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba" + ], + "markers": "python_version >= '3.7'", + "version": "==0.1.2" + }, "multidict": { "hashes": [ "sha256:01368e3c94032ba6ca0b78e7ccb099643466cf24f8dc8eefcfdc0571d56e58f9", @@ -1332,6 +1403,22 @@ "markers": "python_version >= '3.9'", "version": "==2.33.2" }, + "pydantic-settings": { + "hashes": [ + "sha256:06f0062169818d0f5524420a360d632d5857b83cffd4d42fe29597807a1614ee", + "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796" + ], + "markers": "python_version >= '3.9'", + "version": "==2.10.1" + }, + "pygments": { + "hashes": [ + "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", + "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" + ], + "markers": "python_version >= '3.8'", + "version": "==2.19.2" + }, "pyjwt": { "hashes": [ "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", @@ -1354,7 +1441,7 @@ "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "version": "==2.9.0.post0" }, "python-dotenv": { @@ -1458,96 +1545,96 @@ }, "regex": { "hashes": [ - "sha256:0200a5150c4cf61e407038f4b4d5cdad13e86345dac29ff9dab3d75d905cf130", - "sha256:02e5860a250cd350c4933cf376c3bc9cb28948e2c96a8bc042aee7b985cfa26f", - "sha256:075641c94126b064c65ab86e7e71fc3d63e7ff1bea1fb794f0773c97cdad3a03", - "sha256:0a5966220b9a1a88691282b7e4350e9599cf65780ca60d914a798cb791aa1177", - "sha256:0b85241d3cfb9f8a13cefdfbd58a2843f208f2ed2c88181bf84e22e0c7fc066d", - "sha256:1a764a83128af9c1a54be81485b34dca488cbcacefe1e1d543ef11fbace191e1", - "sha256:1e4f4f62599b8142362f164ce776f19d79bdd21273e86920a7b604a4275b4f59", - "sha256:20ff8433fa45e131f7316594efe24d4679c5449c0ca69d91c2f9d21846fdf064", - "sha256:24257953d5c1d6d3c129ab03414c07fc1a47833c9165d49b954190b2b7f21a1a", - "sha256:2d15a9da5fad793e35fb7be74eec450d968e05d2e294f3e0e77ab03fa7234a83", - "sha256:3157aa512b9e606586900888cd469a444f9b898ecb7f8931996cb715f77477f0", - "sha256:32b9f9bcf0f605eb094b08e8da72e44badabb63dde6b83bd530580b488d1c6da", - "sha256:33be70d75fa05a904ee0dc43b650844e067d14c849df7e82ad673541cd465b5f", - "sha256:35e43ebf5b18cd751ea81455b19acfdec402e82fe0dc6143edfae4c5c4b3909a", - "sha256:37555e4ae0b93358fa7c2d240a4291d4a4227cc7c607d8f85596cdb08ec0a083", - "sha256:3b836eb4a95526b263c2a3359308600bd95ce7848ebd3c29af0c37c4f9627cd3", - "sha256:4494f8fd95a77eb434039ad8460e64d57baa0434f1395b7da44015bef650d0e4", - "sha256:469142fb94a869beb25b5f18ea87646d21def10fbacb0bcb749224f3509476f0", - "sha256:48fb045bbd4aab2418dc1ba2088a5e32de4bfe64e1457b948bb328a8dc2f1c2e", - "sha256:4913f52fbc7a744aaebf53acd8d3dc1b519e46ba481d4d7596de3c862e011ada", - "sha256:4b7dc33b9b48fb37ead12ffc7bdb846ac72f99a80373c4da48f64b373a7abeae", - "sha256:4b8c4d39f451e64809912c82392933d80fe2e4a87eeef8859fcc5380d0173c64", - "sha256:4f42b522259c66e918a0121a12429b2abcf696c6f967fa37bdc7b72e61469f98", - "sha256:4fef81b2f7ea6a2029161ed6dea9ae13834c28eb5a95b8771828194a026621e4", - "sha256:524c868ba527eab4e8744a9287809579f54ae8c62fbf07d62aacd89f6026b282", - "sha256:57d25b6732ea93eeb1d090e8399b6235ca84a651b52d52d272ed37d3d2efa0f1", - "sha256:5d7de1ceed5a5f84f342ba4a9f4ae589524adf9744b2ee61b5da884b5b659834", - "sha256:6164b1d99dee1dfad33f301f174d8139d4368a9fb50bf0a3603b2eaf579963ad", - "sha256:656433e5b7dccc9bc0da6312da8eb897b81f5e560321ec413500e5367fcd5d47", - "sha256:69c593ff5a24c0d5c1112b0df9b09eae42b33c014bdca7022d6523b210b69f72", - "sha256:69ed3bc611540f2ea70a4080f853741ec698be556b1df404599f8724690edbcd", - "sha256:6c053f9647e3421dd2f5dff8172eb7b4eec129df9d1d2f7133a4386319b47435", - "sha256:6cef962d7834437fe8d3da6f9bfc6f93f20f218266dcefec0560ed7765f5fe01", - "sha256:70645cad3407d103d1dbcb4841839d2946f7d36cf38acbd40120fee1682151e5", - "sha256:716a47515ba1d03f8e8a61c5013041c8c90f2e21f055203498105d7571b44531", - "sha256:72a26dcc6a59c057b292f39d41465d8233a10fd69121fa24f8f43ec6294e5415", - "sha256:7373afae7cfb716e3b8e15d0184510d518f9d21471f2d62918dbece85f2c588f", - "sha256:739a74970e736df0773788377969c9fea3876c2fc13d0563f98e5503e5185f46", - "sha256:7bf1c5503a9f2cbd2f52d7e260acb3131b07b6273c470abb78568174fe6bde3f", - "sha256:7f7211a746aced993bef487de69307a38c5ddd79257d7be83f7b202cb59ddb50", - "sha256:8283afe7042d8270cecf27cca558873168e771183d4d593e3c5fe5f12402212a", - "sha256:85c3a958ef8b3d5079c763477e1f09e89d13ad22198a37e9d7b26b4b17438b33", - "sha256:89c9504fc96268e8e74b0283e548f53a80c421182a2007e3365805b74ceef936", - "sha256:95b4639c77d414efa93c8de14ce3f7965a94d007e068a94f9d4997bb9bd9c81f", - "sha256:95d538b10eb4621350a54bf14600cc80b514211d91a019dc74b8e23d2159ace5", - "sha256:96bbae4c616726f4661fe7bcad5952e10d25d3c51ddc388189d8864fbc1b3c68", - "sha256:98d0ce170fcde1a03b5df19c5650db22ab58af375aaa6ff07978a85c9f250f0e", - "sha256:9960d162f3fecf6af252534a1ae337e9c2e20d74469fed782903b24e2cc9d3d7", - "sha256:9a9ab52a466a9b4b91564437b36417b76033e8778e5af8f36be835d8cb370d62", - "sha256:9d644de5520441e5f7e2db63aec2748948cc39ed4d7a87fd5db578ea4043d997", - "sha256:9ead9765217afd04a86822dfcd4ed2747dfe426e887da413b15ff0ac2457e21a", - "sha256:9feab78a1ffa4f2b1e27b1bcdaad36f48c2fed4870264ce32f52a393db093c78", - "sha256:a16dd56bbcb7d10e62861c3cd000290ddff28ea142ffb5eb3470f183628011ac", - "sha256:a664291c31cae9c4a30589bd8bc2ebb56ef880c9c6264cb7643633831e606a4d", - "sha256:aaef1f056d96a0a5d53ad47d019d5b4c66fe4be2da87016e0d43b7242599ffc7", - "sha256:baf2fe122a3db1c0b9f161aa44463d8f7e33eeeda47bb0309923deb743a18276", - "sha256:bca11d3c38a47c621769433c47f364b44e8043e0de8e482c5968b20ab90a3986", - "sha256:c1844be23cd40135b3a5a4dd298e1e0c0cb36757364dd6cdc6025770363e06c1", - "sha256:c3c9740a77aeef3f5e3aaab92403946a8d34437db930a0280e7e81ddcada61f5", - "sha256:c436fd1e95c04c19039668cfb548450a37c13f051e8659f40aed426e36b3765f", - "sha256:c7f663ccc4093877f55b51477522abd7299a14c5bb7626c5238599db6a0cb95d", - "sha256:c83aec91af9c6fbf7c743274fd952272403ad9a9db05fe9bfc9df8d12b45f176", - "sha256:cbe1698e5b80298dbce8df4d8d1182279fbdaf1044e864cbc9d53c20e4a2be77", - "sha256:cbfaa401d77334613cf434f723c7e8ba585df162be76474bccc53ae4e5520b3a", - "sha256:d03c6f9dcd562c56527c42b8530aad93193e0b3254a588be1f2ed378cdfdea1b", - "sha256:d428fc7731dcbb4e2ffe43aeb8f90775ad155e7db4347a639768bc6cd2df881a", - "sha256:d5273fddf7a3e602695c92716c420c377599ed3c853ea669c1fe26218867002f", - "sha256:d600e58ee6d036081c89696d2bdd55d507498a7180df2e19945c6642fac59588", - "sha256:d72765a4bff8c43711d5b0f5b452991a9947853dfa471972169b3cc0ba1d0751", - "sha256:d856164d25e2b3b07b779bfed813eb4b6b6ce73c2fd818d46f47c1eb5cd79bd6", - "sha256:da304313761b8500b8e175eb2040c4394a875837d5635f6256d6fa0377ad32c8", - "sha256:da7507d083ee33ccea1310447410c27ca11fb9ef18c95899ca57ff60a7e4d8f1", - "sha256:dde35e2afbbe2272f8abee3b9fe6772d9b5a07d82607b5788e8508974059925c", - "sha256:e154a7ee7fa18333ad90b20e16ef84daaeac61877c8ef942ec8dfa50dc38b7a1", - "sha256:e4636a7f3b65a5f340ed9ddf53585c42e3ff37101d383ed321bfe5660481744b", - "sha256:e91eb2c62c39705e17b4d42d4b86c4e86c884c0d15d9c5a47d0835f8387add8e", - "sha256:ea74cf81fe61a7e9d77989050d0089a927ab758c29dac4e8e1b6c06fccf3ebf0", - "sha256:ee38926f31f1aa61b0232a3a11b83461f7807661c062df9eb88769d86e6195c3", - "sha256:efac4db9e044d47fd3b6b0d40b6708f4dfa2d8131a5ac1d604064147c0f552fd", - "sha256:f14b36e6d4d07f1a5060f28ef3b3561c5d95eb0651741474ce4c0a4c56ba8719", - "sha256:f3e5c1e0925e77ec46ddc736b756a6da50d4df4ee3f69536ffb2373460e2dafd", - "sha256:f3f6e8e7af516a7549412ce57613e859c3be27d55341a894aacaa11703a4c31a", - "sha256:f7f3071b5faa605b0ea51ec4bb3ea7257277446b053f4fd3ad02b1dcb4e64353", - "sha256:f978ddfb6216028c8f1d6b0f7ef779949498b64117fc35a939022f67f810bdcb", - "sha256:fa1cdfb8db96ef20137de5587954c812821966c3e8b48ffc871e22d7ec0a4938", - "sha256:fb31080f2bd0681484b275461b202b5ad182f52c9ec606052020fe13eb13a72f", - "sha256:fd5edc3f453de727af267c7909d083e19f6426fc9dd149e332b6034f2a5611e6" + "sha256:008947a7fa92f4cb3b28201c9aa7becc0a44c31a7c2fcb934356e1877baccc09", + "sha256:090d20a6f308c1cd3c33824e892666089d9719ff88e139d4b63623e881d3945c", + "sha256:0b4d8a7f75da748a2d0c045600259f1899c9dd8dd9d3da1daa50bf534c3fa5ba", + "sha256:0c42fbffe25ac6291f8dd00176d1916165550aa649d14e9c4668d6a3d6a5c900", + "sha256:0c460628f6098cf8916b2d62fb39a37a39e49cca0279ac301ff9d94f7e75033e", + "sha256:0d4b71791975fc203e0e6c50db974abb23a8df30729c1ac4fd68c9f2bb8c9358", + "sha256:119a0e930916bb26fe028ef5098c6cad66d7a298560cacbc6942e834580dfba5", + "sha256:145fb4ca5a85e26c330b464fc71bbe0e92523ec5d295c6de9a1e31b06ebccf25", + "sha256:156f711019968ffb3512723a38b06d94d379675c296bdb6104d1abb6e57374c6", + "sha256:15869e4f36de7091342e1dae90216aafa3746e3a069f30b34503a36931036f95", + "sha256:16b5ca6570c71b1ee61dd30f24a1944eb82a372364e37f58f9b9731636cc6ba9", + "sha256:1915dfda52bd4d466f3a66b66988db1f647ee1d9c605858640ceeb779cffd908", + "sha256:1dcec2448ed0062f63e82ca02d1d05f74d4127cb6a9d76a73df60e81298d380b", + "sha256:2206d3a30469e8fc8848139884168127f456efbaca8ae14809c26b98d2be15c6", + "sha256:284fcd2dcb613e8b89b22a30cf42998c9a73ee360b8a24db8457d24f5c42282e", + "sha256:2dadb4ecaad42562771697685a381e3f723bd4d522e357c07ae4a541ebf5753c", + "sha256:305577fab545e64fb84d9a24269aa3132dbe05e1d7fa74b3614e93ec598fe6e6", + "sha256:33a26d4b2dc639868d73b9ec4ff8a89eb295797170125e4d4810ad23228f93c8", + "sha256:348cbcdf2d9dd0d09f05a78218776a33779e95aa57d553065a00429a96c553d3", + "sha256:394c492c398a9f9e17545e19f770c58b97e65963eedaa25bb879e80a03e2b327", + "sha256:3f747541fd1ad1dcf859ce221749a5d26d7dbe6d928efdd407c97a2d27c8f434", + "sha256:40eeff06bbcfa69201b60488f3f3aa38ad3c92c7c0ab2cfc7c9599abfdf24262", + "sha256:421b6ccd037ad551e1ef1bc31debc3a914b579c27c0807f35c85f13b0eccbff3", + "sha256:4561aeb36b0bf3bb44826e4b61a80c6ace0d8839bf4914d78f061f9ba61444b4", + "sha256:470138c8882d66493969f45fad2f8e20f35e381b9f96a37f59a5ac786e653cf6", + "sha256:4d6dbdfdb4de3a77d1b2f9ec6bded2e056081407923d69236e13457924cf5fd7", + "sha256:50628bc413193041838001b3926570629369d675b92badd6962c402aa09ed4c4", + "sha256:54018e66344d60b214f4aa151c046e0fa528221656f4f7eba5a787ccc7057312", + "sha256:5421a2d2026e8189500f12375cfd80a9a1914466d446edd28b37eb33c1953b39", + "sha256:590de47e6c390a42e6bfb1bdbe2148456827a6b28464c6e387f51b4bbe1f83e2", + "sha256:5ba4f8b0d5b88c33fe4060e6def58001fd8334b03c7ce2126964fa8851ab5d1b", + "sha256:5cd74545c32e0da0d489c2293101a82f4a1b88050c235e45509e4123017673b2", + "sha256:62141843d1ec079cd66604424af566e542e7e072b2d9e37165d414d2e6e271dd", + "sha256:630d5c7e0a490db2fee3c7b282c8db973abcbb036a6e4e6dc06c4270965852be", + "sha256:731ddb27a0900fa227dfba976b4efccec8c1c6fba147829bb52e71d49e91a5d7", + "sha256:7b4a3dc155984f09a55c64b90923cb136cd0dad21ca0168aba2382d90ea4c546", + "sha256:86e7ee69fdc9daf6aa98693b0db27a76e3d960c80d87c695af262c2608ccfc6a", + "sha256:8dad3ce46390fe3d81ae1c131e29179f010925fa164e15b918fb037effdb7ad9", + "sha256:8decb26f271b989d612c5d99db5f8f741dcd63ece51c59029840070f5f9778bf", + "sha256:8eaf3ea6631f804efcf0f5bd0e4ab62ba984fd9b70e3aef44b05cc6b951cc728", + "sha256:9082c0db8d43c696fac70b5b0592934f21533940f0118239b5c32fa23e51ed1a", + "sha256:90c37a24d9a809ff1898e74f3318a4e21f8bb3db9975a560fa3722e42c370285", + "sha256:93e077d1fbd24033fa427eab43d80ad47e449d25700cda78e8cac821a30090bf", + "sha256:96adc63fd63c05e2feb9c6b8a7212e2b9f52ccb1fa1f18eaed4f9e0ac2cbd186", + "sha256:97b98ea38fc3c1034f3d7bd30288d2c5b3be8cdcd69e2061d1c86cb14644a27b", + "sha256:9b3535b9a69a818735ebac392876dae4b215fe28c13b145353a2dac468ebae16", + "sha256:a367dbb66842a08744f49c64ba1aab23e4cbcc924bae8ef40870f2c51d6cb240", + "sha256:a848368797515bc141d3fad5fd2d81bf9e8a6a22d9ac1a4be4690dd22e997854", + "sha256:aef62e0b08b0e3c2616783a9f75a02f001254695a0a1d28b829dc9fb6a3603e4", + "sha256:b394b5157701b22cf63699c792bfeed65fbfeacbd94fea717a9e2036a51148ab", + "sha256:b839268539b44a965f3ed680fda6270337a05bd425cc80542e0c6808efdc9a7e", + "sha256:ba1deae2ceaa0b181ac9fd4cb8f04d6ba1494f3c8d053c8999f7c0dadb93497b", + "sha256:bc94bccb0482a1eceb34961e3c46e25a3746633fa19f93c93a42ff4b231ee6c3", + "sha256:c03308757831a8d89e7c007abb75d1d4c9fbca003b5fb32755d4475914535f08", + "sha256:c15d361fe9800bf38ef69c2e0c4b8b961ae4ce2f076fcf4f28e1fc9ea127f55a", + "sha256:cd7c1821eff911917c476d41030b422791ce282c23ee9e1b8f7681fd0993f1e4", + "sha256:ceeeaab602978c8eac3b25b8707f21a69c0bcd179d9af72519da93ef3966158f", + "sha256:d0ffe4a3257a235f9d39b99c6f1bc53c7a4b11f28565726b1aa00a5787950d60", + "sha256:d1f3498dcc96266b8db76512ffb2432bab2587df5e8ebfdceba5e737378e2bd1", + "sha256:d41726de2040c2a487bbac70fdd6e3ff2f1aa47dc91f0a29f6955a6dfa0f06b6", + "sha256:d41a71342819bdfe87c701f073a14ea4bd3f847333d696c7344e9ff3412b7f70", + "sha256:d7a9bc68610d22735b6ac01a3c3ef5b03d9303a18bd3e2249340213389f273dc", + "sha256:d82fb8a97e5ed8f1d3ed7f8e0e7fe1760faa95846c0d38b314284dfdbe86b229", + "sha256:d8cb77df92d1a204a0c218d93c5fb14945e2a7b40da2d9f15b05c9ddae393b43", + "sha256:d92379e53d782bdb773988687300e3bccb91ad38157b754b04b1857aaeea16a3", + "sha256:d93801012bb23901df403ae0adf528abfd50041c9e1136a303937d45c14466e0", + "sha256:db8b0e05af08ff38d78544950e844b5f159032b66dedda19b3f9b17297248be7", + "sha256:dc12259599d953bc25bc01f19b056b9115a96cd3cfe05f154d4570c9649800b0", + "sha256:dc8c7fc96c9eb18b6690c96ec9c8fb63ea2fa78c6df4258fd76b59d4fbf46645", + "sha256:dd23006c90d9ff0c2e4e5f3eaf8233dcefe45684f2acb330869ec5c2aa02b1fb", + "sha256:dd61f18dc4446bc3a2904559a61f32e98091cef7fb796e06fa35b9bfefe4c0c5", + "sha256:dd7df4ae4ea0efe0d378535e9825bd20e3be8d57eb3d55291d8094d61c9ccd9e", + "sha256:decd84f195c08b3d9d0297a7e310379aae13ca7e166473534508c81b95c74bba", + "sha256:df8deeb34e06c8ba196beabbcf2810d5ecd8cf71cfe69899e93806244610f7ae", + "sha256:e2ef0087ad6949918836f215480a9331f6c59ad54912a9a412f08ab1c9ccbc98", + "sha256:e3948db57ebe3c4bfb7e05765411ce6186820cafa27e5c737d72dbc5249010b3", + "sha256:e785e40f7edfc19ff0b81b27f25eefdb0251cfd2ac4a9fa1eea03f5129e93758", + "sha256:e78ab1b3e68b890d7ebd69218cfbfe4a09dc00b8a47be8648510b81b932d55ff", + "sha256:e8f709146e0f3dafdb4315884de1490ab59f1b93ecf7f9c6c8b0f655f437e593", + "sha256:ea197ac22396faf5e70c87836bb89f94ed5b500e1b407646a4e5f393239611f1", + "sha256:ebaf81f7344dbf1a2b383e35923648de8f78fb262cf04154c82853887ac3e684", + "sha256:eed02e5c39f91268ea4ddf68ee19eed189d57c605530b7d32960f54325c52e7a", + "sha256:f21b416be10a8348a7313ba8c610569a1ab4bf8ec70731750540842a4551cd3d", + "sha256:f89e5beb3012d3c36c526fd4af163ada24011a0b417378f726b17c2fb382a35d", + "sha256:fadf22d84901f1b6cc6b27439d98688a33cefb83e70c885791c2c27524907ed4", + "sha256:fbabdb18fdd1fc4b0740f4e6b3070d7f41f98a88b8c38cf1962b6dcb3e745e56", + "sha256:fd347592a4811ba1d246f99fb53db82a1898a5aebb511281ac0c2d81632e1789" ], "markers": "python_version >= '3.9'", - "version": "==2025.7.34" + "version": "==2025.8.29" }, "requests": { "hashes": [ @@ -1557,6 +1644,14 @@ "markers": "python_version >= '3.9'", "version": "==2.32.5" }, + "rich": { + "hashes": [ + "sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f", + "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8" + ], + "markers": "python_full_version >= '3.8.0'", + "version": "==14.1.0" + }, "rpds-py": { "hashes": [ "sha256:008b839781d6c9bf3b6a8984d1d8e56f0ec46dc56df61fd669c49b58ae800400", @@ -1718,12 +1813,20 @@ "markers": "python_version >= '3.9'", "version": "==0.27.1" }, + "shellingham": { + "hashes": [ + "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", + "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de" + ], + "markers": "python_version >= '3.7'", + "version": "==1.5.4" + }, "six": { "hashes": [ "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "version": "==1.17.0" }, "sniffio": { @@ -1734,6 +1837,14 @@ "markers": "python_version >= '3.7'", "version": "==1.3.1" }, + "sse-starlette": { + "hashes": [ + "sha256:16b7cbfddbcd4eaca11f7b586f3b8a080f1afe952c15813455b162edea619e5a", + "sha256:ccd60b5765ebb3584d0de2d7a6e4f745672581de4f5005ab31c3a25d10b52b3a" + ], + "markers": "python_version >= '3.9'", + "version": "==3.0.2" + }, "starlette": { "hashes": [ "sha256:6bc94f839cc176c4858894f1f8908f0ab79dfec1a6b8402f6da9be26ebea52e9", @@ -1789,24 +1900,62 @@ }, "tokenizers": { "hashes": [ - "sha256:1340ff877ceedfa937544b7d79f5b7becf33a4cfb58f89b3b49927004ef66f78", - "sha256:2107ad649e2cda4488d41dfd031469e9da3fcbfd6183e74e4958fa729ffbf9c6", - "sha256:2ccc10a7c3bcefe0f242867dc914fc1226ee44321eb618cfe3019b5df3400133", - "sha256:39b376f5a1aee67b4d29032ee85511bbd1b99007ec735f7f35c8a2eb104eade5", - "sha256:3c1f4317576e465ac9ef0d165b247825a2a4078bcd01cba6b54b867bdf9fdd8b", - "sha256:3c73012da95afafdf235ba80047699df4384fdc481527448a078ffd00e45a7d9", - "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597", - "sha256:51b7eabb104f46c1c50b486520555715457ae833d5aee9ff6ae853d1130506ff", - "sha256:5e2f601a8e0cd5be5cc7506b20a79112370b9b3e9cb5f13f68ab11acd6ca7d60", - "sha256:6c42a930bc5f4c47f4ea775c91de47d27910881902b0f20e4990ebe045a415d0", - "sha256:714b05b2e1af1288bd1bc56ce496c4cebb64a20d158ee802887757791191e6e2", - "sha256:c212aa4e45ec0bb5274b16b6f31dd3f1c41944025c2358faaa5782c754e84c24", - "sha256:cc88bb34e23a54cc42713d6d98af5f1bf79c07653d24fe984d2d695ba2c922a2", - "sha256:f23186c40395fc390d27f519679a58023f368a0aad234af145e0f39ad1212732", - "sha256:fa23f85fbc9a02ec5c6978da172cdcbac23498c3ca9f3645c5c68740ac007880" + "sha256:1626cb186e143720c62c6c6b5371e62bbc10af60481388c0da89bc903f37ea0c", + "sha256:2e33b98525be8453f355927f3cab312c36cd3e44f4d7e9e97da2fa94d0a49dcb", + "sha256:4136e1558a9ef2e2f1de1555dcd573e1cbc4a320c1a06c4107a3d46dc8ac6e4b", + "sha256:71784b9ab5bf0ff3075bceeb198149d2c5e068549c0d18fe32d06ba0deb63f79", + "sha256:76cf6757c73a10ef10bf06fa937c0ec7393d90432f543f49adc8cab3fb6f26cb", + "sha256:790bad50a1b59d4c21592f9c3cf5e5cf9c3c7ce7e1a23a739f13e01fb1be377a", + "sha256:8337ca75d0731fc4860e6204cc24bb36a67d9736142aa06ed320943b50b1e7ed", + "sha256:a89264e26f63c449d8cded9061adea7b5de53ba2346fc7e87311f7e4117c1cc8", + "sha256:c78174859eeaee96021f248a56c801e36bfb6bd5b067f2e95aa82445ca324f00", + "sha256:cdf5954de3962a5fd9781dc12048d24a1a6f1f5df038c6e95db328cd22964206", + "sha256:da589a61cbfea18ae267723d6b029b84598dc8ca78db9951d8f5beff72d8507c", + "sha256:dbf9d6851bddae3e046fedfb166f47743c1c7bd11c640f0691dd35ef0bcad3be", + "sha256:ea8562fa7498850d02a16178105b58803ea825b50dc9094d60549a7ed63654bb", + "sha256:eaa9620122a3fb99b943f864af95ed14c8dfc0f47afa3b404ac8c16b3f2bb484", + "sha256:ec5b71f668a8076802b0241a42387d48289f25435b86b769ae1837cad4172a17" ], "markers": "python_version >= '3.9'", - "version": "==0.21.4" + "version": "==0.22.0" + }, + "tomli": { + "hashes": [ + "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", + "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", + "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", + "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", + "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", + "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", + "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", + "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", + "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", + "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", + "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", + "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", + "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", + "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", + "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", + "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", + "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", + "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", + "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", + "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", + "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", + "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", + "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", + "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", + "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", + "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", + "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", + "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", + "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", + "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", + "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", + "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.1" }, "tqdm": { "hashes": [ @@ -1817,6 +1966,14 @@ "markers": "python_version >= '3.7'", "version": "==4.67.1" }, + "typer": { + "hashes": [ + "sha256:0c600503d472bcf98d29914d4dcd67f80c24cc245395e2e00ba3603c9332e8ba", + "sha256:643919a79182ab7ac7581056d93c6a2b865b026adf2872c4d02c72758e6f095b" + ], + "markers": "python_version >= '3.7'", + "version": "==0.17.3" + }, "typing-extensions": { "hashes": [ "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", @@ -2076,7 +2233,7 @@ "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049" ], - "markers": "python_version >= '3.11'", + "markers": "python_version >= '3.8'", "version": "==0.4.0" }, "greenlet": { diff --git a/README.md b/README.md index 207e3bb8..e5e91072 100644 --- a/README.md +++ b/README.md @@ -1,179 +1,425 @@ [![Try Free](https://img.shields.io/badge/Try%20Free-FalkorDB%20Cloud-FF8101?labelColor=FDE900&link=https://app.falkordb.cloud)](https://app.falkordb.cloud) [![Dockerhub](https://img.shields.io/docker/pulls/falkordb/queryweaver?label=Docker)](https://hub.docker.com/r/falkordb/queryweaver/) [![Discord](https://img.shields.io/discord/1146782921294884966?style=flat-square)](https://discord.com/invite/6M4QwDXn2w) -[![Workflow](https://github.com/FalkorDB/QueryWeaver/actions/workflows/pylint.yml/badge.svg?branch=main)](https://github.com/FalkorDB/QueryWeaver/actions/workflows/pylint.yml) +[![Tests](https://github.com/FalkorDB/QueryWeaver/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/FalkorDB/QueryWeaver/actions/workflows/tests.yml) +[![Swagger UI](https://img.shields.io/badge/API-Swagger-11B48A?logo=swagger&logoColor=white)](https://app.queryweaver.ai/docs) # QueryWeaver -QueryWeaver is an open-source Text2SQL tool that transforms natural language into SQL using graph-powered schema understanding. Ask your database questions in plain English—QueryWeaver handles the weaving. +QueryWeaver is an open-source Text2SQL tool that converts plain-English questions into SQL using graph-powered schema understanding. It helps you ask databases natural-language questions and returns SQL and results. -## Setup +![Screenshot](https://github.com/user-attachments/assets/a0be7bbd-0c99-4399-a302-2b9f7b419dd2) -### Prerequisites +TL;DR +- Try quickly with Docker: `docker run -p 5000:5000 -it falkordb/queryweaver` +- Develop locally: see "Development" section below -- Python 3.12+ -- pipenv (for dependency management) -- FalkorDB instance +## Quick start — Docker (recommended for evaluation) + +Run the official image locally (no local Python or Node required): + +```bash +docker run -p 5000:5000 -it falkordb/queryweaver +``` + +Open: http://localhost:5000 + +### Prefer using a .env file (recommended) + +Create a local `.env` by copying `.env.example` and pass it to Docker. This is the simplest way to provide all required configuration: + +```bash +cp .env.example .env +# edit .env to set your values, then: +docker run -p 5000:5000 --env-file .env falkordb/queryweaver +``` + +### Or pass individual environment variables + +If you prefer to pass variables on the command line, use `-e` flags (less convenient for many variables): + +```bash +docker run -p 5000:5000 -it \ + -e APP_ENV=production \ + -e FASTAPI_SECRET_KEY=your_super_secret_key_here \ + -e GOOGLE_CLIENT_ID=your_google_client_id \ + -e GOOGLE_CLIENT_SECRET=your_google_client_secret \ + -e GITHUB_CLIENT_ID=your_github_client_id \ + -e GITHUB_CLIENT_SECRET=your_github_client_secret \ + -e AZURE_API_KEY=your_azure_api_key \ + falkordb/queryweaver +``` + +Note: To use OpenAI directly instead of Azure OpenAI, replace `AZURE_API_KEY` with `OPENAI_API_KEY` in the above command. + +For a full list of configuration options, consult `.env.example`. + +## MCP server: host or connect (optional) + +QueryWeaver includes optional support for the Model Context Protocol (MCP). You can either have QueryWeaver expose an MCP-compatible HTTP surface (so other services can call QueryWeaver as an MCP server), or configure QueryWeaver to call an external MCP server for model/context services. + +What QueryWeaver provides +- The app registers MCP operations focused on Text2SQL flows: + - `list_databases` + - `connect_database` + - `database_schema` + - `query_database` + +- To disable the built-in MCP endpoints set `DISABLE_MCP=true` in your `.env` or environment (default: MCP enabled). +- Configuration -- Node.js and npm (required for frontend TypeScript build) +- `DISABLE_MCP` — disable QueryWeaver's built-in MCP HTTP surface. Set to `true` to disable. Default: `false` (MCP enabled). -### Installation +Examples -1. Clone the repository -2. Install dependencies with Pipenv: - ```bash - pipenv sync - ``` +Disable the built-in MCP when running with Docker: -3. Set up environment variables by copying `.env.example` to `.env` and filling in your values: - ```bash - cp .env.example .env - ``` +```bash +docker run -p 5000:5000 -it --env DISABLE_MCP=true falkordb/queryweaver +``` +Calling the built-in MCP endpoints (example) +- The MCP surface is exposed as HTTP endpoints. + + +### Server Configuration + +Below is a minimal example `mcp.json` client configuration that targets a local QueryWeaver instance exposing the MCP HTTP surface at `/mcp`. + +```json +{ + "servers": { + "queryweaver": { + "type": "http", + "url": "http://127.0.0.1:5000/mcp", + "headers": { + "Authorization": "Bearer your_token_here" + } + } + }, + "inputs": [] +} +``` + +## REST API + +### API Documentation -### OAuth Configuration +Swagger UI: https://app.queryweaver.ai/docs -This application supports authentication via Google and GitHub OAuth. You'll need to set up OAuth applications for both providers: +OpenAPI JSON: https://app.queryweaver.ai/openapi.json -#### Google OAuth Setup +### Overview -1. Go to [Google Cloud Console](https://console.developers.google.com/) -2. Create a new project or select an existing one -3. Enable the Google+ API -4. Go to "Credentials" and create an OAuth 2.0 Client ID -5. Add your domain to authorized origins (e.g., `http://localhost:5000`) -6. Add the callback URL: `http://localhost:5000/login/google/authorized` -7. Copy the Client ID and Client Secret to your `.env` file +QueryWeaver exposes a small REST API for managing graphs (database schemas) and running Text2SQL queries. All endpoints that modify or access user-scoped data require authentication via a bearer token. In the browser the app uses session cookies and OAuth flows; for CLI and scripts you can use an API token (see `tokens` routes or the web UI to create one). -#### GitHub OAuth Setup +Core endpoints +- GET /graphs — list available graphs for the authenticated user +- GET /graphs/{graph_id}/data — return nodes/links (tables, columns, foreign keys) for the graph +- POST /graphs — upload or create a graph (JSON payload or file upload) +- POST /graphs/{graph_id} — run a Text2SQL chat query against the named graph (streaming response) -1. Go to GitHub Settings → Developer settings → OAuth Apps -2. Click "New OAuth App" -3. Fill in the application details: - - Application name: Your app name - - Homepage URL: `http://localhost:5000` - - Authorization callback URL: `http://localhost:5000/login/github/authorized` -4. Copy the Client ID and Client Secret to your `.env` file +Authentication +- Add an Authorization header: `Authorization: Bearer ` -### Running the Application +Examples + +1) List graphs (GET) + +curl example: ```bash -pipenv run uvicorn api.index:app --host "localhost" --port "5000" +curl -s -H "Authorization: Bearer $TOKEN" \ + https://app.queryweaver.ai/graphs ``` -The application will be available at `http://localhost:5000`. +Python example: -## Frontend build +```python +import requests +resp = requests.get('https://app.queryweaver.ai/graphs', headers={'Authorization': f'Bearer {TOKEN}'}) +print(resp.json()) +``` -The project includes a TypeScript frontend located in the `app/` folder. Build the frontend before running the app in production or after modifying frontend source files. +2) Get graph schema (GET) -Install frontend deps and build (recommended): +curl example: ```bash -make install # installs backend and frontend deps -make build-prod # runs the frontend production build (produces app/public/js/app.js) +curl -s -H "Authorization: Bearer $TOKEN" \ + https://app.queryweaver.ai/graphs/my_database/data ``` -Or run directly from the `app/` folder: +Python example: + +```python +resp = requests.get('https://app.queryweaver.ai/graphs/my_database/data', headers={'Authorization': f'Bearer {TOKEN}'}) +print(resp.json()) +``` + +3) Load a graph (POST) — JSON payload + +```bash +curl -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" \ + -d '{"database": "my_database", "tables": [...]}' \ + https://app.queryweaver.ai/graphs +``` + +Or upload a file (multipart/form-data): ```bash +curl -H "Authorization: Bearer $TOKEN" -F "file=@schema.json" \ + https://app.queryweaver.ai/graphs +``` + +4) Query a graph (POST) — run a chat-based Text2SQL request + +The `POST /graphs/{graph_id}` endpoint accepts a JSON body with at least a `chat` field (an array of messages). The endpoint streams processing steps and the final SQL back as server-sent-message chunks delimited by a special boundary used by the frontend. For simple scripting you can call it and read the final JSON object from the streamed messages. + +Example payload: + +```json +{ + "chat": ["How many users signed up last month?"], + "result": [], + "instructions": "Prefer PostgreSQL compatible SQL" +} +``` + +curl example (simple, collects whole response): + +```bash +curl -s -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" \ + -d '{"chat": ["Count orders last week"]}' \ + https://app.queryweaver.ai/graphs/my_database +``` + +Python example (stream-aware): + +```python +import requests +import json + +url = 'https://app.queryweaver.ai/graphs/my_database' +headers = {'Authorization': f'Bearer {TOKEN}', 'Content-Type': 'application/json'} +with requests.post(url, headers=headers, json={"chat": ["Count orders last week"]}, stream=True) as r: + # The server yields JSON objects delimited by a message boundary string + boundary = '|||FALKORDB_MESSAGE_BOUNDARY|||' + buffer = '' + for chunk in r.iter_content(decode_unicode=True, chunk_size=1024): + buffer += chunk + while boundary in buffer: + part, buffer = buffer.split(boundary, 1) + if not part.strip(): + continue + obj = json.loads(part) + print('STREAM:', obj) + +Notes & tips +- Graph IDs are namespaced per-user. When calling the API directly use the plain graph id (the server will namespace by the authenticated user). For uploaded files the `database` field determines the saved graph id. +- The streaming response includes intermediate reasoning steps, follow-up questions (if the query is ambiguous or off-topic), and the final SQL. The frontend expects the boundary string `|||FALKORDB_MESSAGE_BOUNDARY|||` between messages. +- For destructive SQL (INSERT/UPDATE/DELETE etc) the service will include a confirmation step in the stream; the frontend handles this flow. If you automate destructive operations, ensure you handle confirmation properly (see the `ConfirmRequest` model in the code). + + +## Development + +Follow these steps to run and develop QueryWeaver from source. + +### Prerequisites + +- Python 3.12+ +- pipenv +- A FalkorDB instance (local or remote) +- Node.js and npm (for the TypeScript frontend) + +### Install and configure + +Quickstart (recommended for development): + +```bash +# Clone the repo +git clone https://github.com/FalkorDB/QueryWeaver.git +cd QueryWeaver + +# Install dependencies (backend + frontend) and start the dev server +make install +make run-dev +``` + +If you prefer to set up manually or need a custom environment, use Pipenv: + +```bash +# Install Python (backend) and frontend dependencies +pipenv sync --dev + +# Create a local environment file +cp .env.example .env +# Edit .env with your values (set APP_ENV=development for local development) +``` + +### Run the app locally + +```bash +pipenv run uvicorn api.index:app --host 0.0.0.0 --port 5000 --reload +``` + +The server will be available at http://localhost:5000 + +Alternatively, the repository provides Make targets for running the app: + +```bash +make run-dev # development server (reload, debug-friendly) +make run-prod # production mode (ensure frontend build if needed) +``` + +### Frontend build (when needed) + +The frontend is a TypeScript app in `app/`. Build before production runs or after frontend changes: + +```bash +make install # installs backend and frontend deps +make build-prod # builds the frontend into app/public/js/app.js + +# or manually cd app npm ci npm run build ``` -### Running with Docker +### OAuth configuration -You can run QueryWeaver using Docker without installing Python dependencies locally: +QueryWeaver supports Google and GitHub OAuth. Create OAuth credentials for each provider and paste the client IDs/secrets into your `.env` file. + +- Google: set authorized origin and callback `http://localhost:5000/login/google/authorized` +- GitHub: set homepage and callback `http://localhost:5000/login/github/authorized` + +#### Environment-specific OAuth settings + +For production/staging deployments, set `APP_ENV=production` or `APP_ENV=staging` in your environment to enable secure session cookies (HTTPS-only). This prevents OAuth CSRF state mismatch errors. ```bash -docker run -p 5000:5000 -it falkordb/queryweaver +# For production/staging (enables HTTPS-only session cookies) +APP_ENV=production + +# For development (allows HTTP session cookies) +APP_ENV=development ``` -The application will be available at `http://localhost:5000`. +**Important**: If you're getting "mismatching_state: CSRF Warning!" errors on staging/production, ensure `APP_ENV` is set to `production` or `staging` to enable secure session handling. + +### AI/LLM configuration -#### Configuring with Environment Variables +QueryWeaver uses AI models for Text2SQL conversion and supports both Azure OpenAI and OpenAI directly. -You can configure the application by passing environment variables using the `-e` flag. You can copy the variables from `.env.example` and set them as needed: +#### Default: Azure OpenAI + +By default, QueryWeaver is configured to use Azure OpenAI. You need to set all three Azure credentials: ```bash -docker run -p 5000:5000 -it \ - -e FASTAPI_SECRET_KEY=your_super_secret_key_here \ - -e GOOGLE_CLIENT_ID=your_google_client_id \ - -e GOOGLE_CLIENT_SECRET=your_google_client_secret \ - -e GITHUB_CLIENT_ID=your_github_client_id \ - -e GITHUB_CLIENT_SECRET=your_github_client_secret \ - -e AZURE_API_KEY=your_azure_api_key \ - falkordb/queryweaver +AZURE_API_KEY=your_azure_api_key +AZURE_API_BASE=https://your-resource.openai.azure.com/ +AZURE_API_VERSION=2024-12-01-preview ``` -##### Using a .env File +#### Alternative: OpenAI directly -You can also pass a full environment file to Docker using the `--env-file` option. This is the easiest way to provide all required configuration at once: +To use OpenAI directly instead of Azure, simply set the `OPENAI_API_KEY` environment variable: ```bash -docker run -p 5000:5000 --env-file .env falkordb/queryweaver +OPENAI_API_KEY=your_openai_api_key ``` -You can use the provided `.env.example` file as a template: +When `OPENAI_API_KEY` is provided, QueryWeaver automatically switches to use OpenAI's models: +- Embedding model: `openai/text-embedding-ada-002` +- Completion model: `openai/gpt-4.1` + +This configuration is handled automatically in `api/config.py` - you only need to provide the appropriate API key. +#### Docker examples with AI configuration + +Using Azure OpenAI: ```bash -cp .env.example .env -# Edit .env with your values, then run: -docker run -p 5000:5000 --env-file .env falkordb/queryweaver +docker run -p 5000:5000 -it \ + -e FASTAPI_SECRET_KEY=your_secret_key \ + -e AZURE_API_KEY=your_azure_api_key \ + -e AZURE_API_BASE=https://your-resource.openai.azure.com/ \ + -e AZURE_API_VERSION=2024-12-01-preview \ + falkordb/queryweaver ``` -For a complete list of available configuration options, see the `.env.example` file in the repository. +Using OpenAI directly: +```bash +docker run -p 5000:5000 -it \ + -e FASTAPI_SECRET_KEY=your_secret_key \ + -e OPENAI_API_KEY=your_openai_api_key \ + falkordb/queryweaver +``` ## Testing -QueryWeaver includes a comprehensive test suite with both unit and End-to-End (E2E) tests. +> Quick note: many tests require FalkorDB to be available. Use the included helper to run a test DB in Docker if needed. + +### Prerequisites + +- Install dev dependencies: `pipenv sync --dev` +- Start FalkorDB (see `make docker-falkordb`) +- Install Playwright browsers: `pipenv run playwright install` + +### Quick commands + +Recommended: prepare the development/test environment using the Make helper (installs dependencies and Playwright browsers): + +```bash +# Prepare development/test environment (installs deps and Playwright browsers) +make setup-dev +``` -### Quick Start +Alternatively, you can run the E2E-specific setup script and then run tests manually: ```bash -# Set up test environment +# Prepare E2E test environment (installs browsers and other setup) ./setup_e2e_tests.sh # Run all tests make test -# Run only unit tests +# Run unit tests only (faster) make test-unit # Run E2E tests (headless) make test-e2e -# Run E2E tests with visible browser +# Run E2E tests with a visible browser for debugging make test-e2e-headed ``` -### Test Types +### Test types -- **Unit Tests**: Test individual components and functions -- **E2E Tests**: Test complete user workflows using Playwright - - Basic functionality (page loading, UI structure) - - Authentication flows (OAuth integration) - - File upload and processing - - Chat interface and query handling - - API endpoint testing +- Unit tests: focus on individual modules and utilities. Run with `make test-unit` or `pipenv run pytest tests/ -k "not e2e"`. +- End-to-end (E2E) tests: run via Playwright and exercise UI flows, OAuth, file uploads, schema processing, chat queries, and API endpoints. Use `make test-e2e`. -See [tests/e2e/README.md](tests/e2e/README.md) for detailed E2E testing documentation. +See `tests/e2e/README.md` for full E2E test instructions. ### CI/CD -Tests run automatically in GitHub Actions: -- Unit tests run on every push/PR -- E2E tests run with FalkorDB service -- Test artifacts and screenshots saved on failure +GitHub Actions run unit and E2E tests on pushes and pull requests. Failures capture screenshots and artifacts for debugging. + +## Troubleshooting + +- FalkorDB connection issues: start the DB helper `make docker-falkordb` or check network/host settings. +- Playwright/browser failures: install browsers with `pipenv run playwright install` and ensure system deps are present. +- Missing environment variables: copy `.env.example` and fill required values. +- **OAuth "mismatching_state: CSRF Warning!" errors**: Set `APP_ENV=production` (or `staging`) in your environment for HTTPS deployments, or `APP_ENV=development` for HTTP development environments. This ensures session cookies are configured correctly for your deployment type. -## Introduction +## Project layout (high level) -image +- `api/` – FastAPI backend +- `app/` – TypeScript frontend +- `tests/` – unit and E2E tests -## LICENSE +## License Licensed under the GNU Affero General Public License (AGPL). See [LICENSE](LICENSE.txt). -Copyrights FalkorDB Ltd. 2025 +Copyright FalkorDB Ltd. 2025 diff --git a/TOKEN_MANAGEMENT.md b/TOKEN_MANAGEMENT.md new file mode 100644 index 00000000..81e73fdc --- /dev/null +++ b/TOKEN_MANAGEMENT.md @@ -0,0 +1,224 @@ +# Token Management Feature + +This document describes the API token management feature implemented for QueryWeaver, allowing users to generate and manage API tokens for authentication. + +## Overview + +The token management system allows authenticated users to: + +1. **Generate API tokens** - Create secure tokens for API access +2. **View token list** - See all their tokens (with only last 4 digits visible) +3. **Delete tokens** - Remove tokens they no longer need +4. **Use tokens for API authentication** - Authenticate API calls using Bearer tokens + +## Architecture + +### Backend Components + +#### 1. Token Routes (`api/routes/tokens.py`) +- **POST `/tokens/generate`** - Generate a new token +- **GET `/tokens/list`** - List user's tokens +- **DELETE `/tokens/{token_id}`** - Delete a specific token + +#### 2. Authentication Enhancement (`api/auth/user_management.py`) +- Enhanced `token_required` decorator to support both OAuth and API token authentication +- New `validate_api_token_user()` function for token-based authentication + +#### 3. Database Schema +Tokens are stored as nodes in the Organizations graph with the following structure: +```cypher +(:Token { + token_id: "unique_token_identifier", + token_hash: "sha256_hash_of_token", + created_at: timestamp, + last_4_digits: "1234" +})-[:HAS_TOKEN]-(:User) +``` + +### Frontend Components + +#### 1. User Interface (`app/templates/components/`) +- **token_modal.j2** - Token management modal with generation and list views +- **user_profile.j2** - Added "API Tokens" button to user profile dropdown + +#### 2. TypeScript Module (`app/ts/modules/tokens.ts`) +- Token generation and management functions +- Modal handling and user interactions +- API communication for CRUD operations + +#### 3. Styling (`app/public/css/modals.css`) +- Modal styling for token management interface +- Token display and action button styles + +## Security Features + +### 1. Token Generation +- Uses `secrets.token_urlsafe(32)` for cryptographically secure random tokens +- Tokens are 43 characters long (URL-safe base64 encoding) + +### 2. Token Storage +- Only SHA-256 hashes of tokens are stored in the database +- Original tokens are never persisted +- Each token has a unique `token_id` for identification + +### 3. Token Display +- After generation, tokens are shown once in full +- In the token list, only last 4 digits are visible (e.g., "****1234") +- Copy-to-clipboard functionality for newly generated tokens + +### 4. Authentication +- API calls can use `Authorization: Bearer ` header +- Server validates by hashing received token and matching against stored hash +- Falls back to OAuth session authentication if no valid token provided + +## API Usage Examples + +### 1. Generate a Token +```bash +# Must be authenticated via OAuth session +curl -X POST http://localhost:5000/tokens/generate \ + -H "Content-Type: application/json" \ + --cookie "session_cookie=..." +``` + +Response: +```json +{ + "token": "6SxwdQ3vZeEE6xCVwTmD3AbKvWZY2eR_quUCP7eewEA", + "token_id": "G13pqOpPohhs2rnou56A2w", + "created_at": 1706096845, + "last_4_digits": "ewEA" +} +``` + +### 2. List Tokens +```bash +curl -X GET http://localhost:5000/tokens/list \ + -H "Authorization: Bearer 6SxwdQ3vZeEE6xCVwTmD3AbKvWZY2eR_quUCP7eewEA" +``` + +Response: +```json +{ + "tokens": [ + { + "token_id": "G13pqOpPohhs2rnou56A2w", + "created_at": 1706096845, + "last_4_digits": "ewEA" + } + ] +} +``` + +### 3. Delete a Token +```bash +curl -X DELETE http://localhost:5000/tokens/G13pqOpPohhs2rnou56A2w \ + -H "Authorization: Bearer 6SxwdQ3vZeEE6xCVwTmD3AbKvWZY2eR_quUCP7eewEA" +``` + +### 4. Use Token for API Access +```bash +# Any protected endpoint can now use token authentication +curl -X GET http://localhost:5000/graphs \ + -H "Authorization: Bearer 6SxwdQ3vZeEE6xCVwTmD3AbKvWZY2eR_quUCP7eewEA" +``` + +## User Interface Flow + +### 1. Accessing Token Management +1. User logs in via OAuth (Google/GitHub) +2. User clicks their profile picture in the top-right corner +3. User clicks "API Tokens" in the dropdown menu +4. Token management modal opens + +### 2. Generating a Token +1. User clicks "Generate New Token" button +2. System creates secure token and stores hash in database +3. Full token is displayed once with copy button +4. Token appears in user's token list (showing only last 4 digits) + +### 3. Managing Tokens +1. User sees list of all their tokens with creation dates +2. Each token shows only last 4 digits for security +3. User can delete tokens using the "Delete" button +4. Confirmation modal appears before deletion + +## Database Queries + +### Create Token +```cypher +MATCH (user:User {email: $user_email}) +CREATE (token:Token { + token_id: $token_id, + token_hash: $token_hash, + created_at: $created_at, + last_4_digits: $last_4_digits +}) +CREATE (user)-[:HAS_TOKEN]->(token) +RETURN token +``` + +### List User Tokens +```cypher +MATCH (user:User {email: $user_email})-[:HAS_TOKEN]->(token:Token) +RETURN token.token_id, token.created_at, token.last_4_digits +ORDER BY token.created_at DESC +``` + +### Validate Token +```cypher +MATCH (user:User)-[:HAS_TOKEN]->(token:Token {token_hash: $token_hash}) +RETURN user.email +``` + +### Delete Token +```cypher +MATCH (user:User {email: $user_email})-[r:HAS_TOKEN]->(token:Token {token_id: $token_id}) +DELETE r, token +``` + +## Testing + +The implementation includes comprehensive tests: + +- **Unit tests** for token generation and validation functions +- **API tests** for authentication and authorization +- **Integration tests** for token CRUD operations + +Run tests: +```bash +make test-unit # Run unit tests +pipenv run python -m pytest tests/test_tokens.py -v # Run token-specific tests +``` + +## Implementation Notes + +### 1. Graph Database Integration +- Leverages existing Organizations graph structure +- Tokens connect to User nodes via HAS_TOKEN relationships +- Maintains consistency with existing authentication patterns + +### 2. Backward Compatibility +- OAuth authentication continues to work unchanged +- API token authentication is additive, not replacement +- Existing protected routes automatically support both auth methods + +### 3. Error Handling +- Comprehensive error handling for database operations +- Proper HTTP status codes and error messages +- Graceful fallback between authentication methods + +### 4. Performance Considerations +- Efficient graph queries with proper indexing +- Minimal additional overhead for token validation +- Caching considerations for user email lookups + +## Future Enhancements + +Potential improvements for the token system: + +1. **Token Expiration** - Add configurable expiration dates +2. **Token Scopes** - Limit tokens to specific API operations +3. **Usage Analytics** - Track token usage and last access times +4. **Rate Limiting** - Implement per-token rate limiting +5. **Token Naming** - Allow users to name their tokens for easier management \ No newline at end of file diff --git a/api/agents/__init__.py b/api/agents/__init__.py index 31399e96..efd63f4e 100644 --- a/api/agents/__init__.py +++ b/api/agents/__init__.py @@ -3,7 +3,6 @@ from .analysis_agent import AnalysisAgent from .relevancy_agent import RelevancyAgent from .follow_up_agent import FollowUpAgent -from .taxonomy_agent import TaxonomyAgent from .response_formatter_agent import ResponseFormatterAgent from .utils import parse_response @@ -11,7 +10,6 @@ "AnalysisAgent", "RelevancyAgent", "FollowUpAgent", - "TaxonomyAgent", "ResponseFormatterAgent", "parse_response" ] diff --git a/api/agents/analysis_agent.py b/api/agents/analysis_agent.py index cb59c161..661c3025 100644 --- a/api/agents/analysis_agent.py +++ b/api/agents/analysis_agent.py @@ -11,13 +11,13 @@ class AnalysisAgent(BaseAgent): """Agent for analyzing user queries and generating database analysis.""" - def get_analysis( + def get_analysis( # pylint: disable=too-many-arguments, too-many-positional-arguments self, user_query: str, combined_tables: list, db_description: str, - instructions: str = None, - memory_context: str = None, + instructions: str | None = None, + memory_context: str | None = None, ) -> dict: """Get analysis of user query against database schema.""" formatted_schema = self._format_schema(combined_tables) @@ -62,48 +62,103 @@ def _format_schema(self, schema_data: List) -> str: formatted_schema = [] for table_info in schema_data: - table_name = table_info[0] - table_description = table_info[1] - foreign_keys = table_info[2] - columns = table_info[3] - - # Format table header - table_str = f"Table: {table_name} - {table_description}\n" - - # Format columns using the updated OrderedDict structure - for column in columns: - col_name = column.get("columnName", "") - col_type = column.get("dataType", None) - col_description = column.get("description", "") - col_key = column.get("keyType", None) - nullable = column.get("nullable", False) - - key_info = ( - ", PRIMARY KEY" - if col_key == "PRI" - else ", FOREIGN KEY" if col_key == "FK" else "" - ) - column_str = (f" - {col_name} ({col_type},{key_info},{col_key}," - f"{nullable}): {col_description}") - table_str += column_str + "\n" - - # Format foreign keys - if isinstance(foreign_keys, dict) and foreign_keys: - table_str += " Foreign Keys:\n" - for fk_name, fk_info in foreign_keys.items(): - column = fk_info.get("column", "") - ref_table = fk_info.get("referenced_table", "") - ref_column = fk_info.get("referenced_column", "") - table_str += ( - f" - {fk_name}: {column} references {ref_table}.{ref_column}\n" - ) - + table_str = self._format_single_table(table_info) formatted_schema.append(table_str) return "\n".join(formatted_schema) - def _build_prompt( - self, user_input: str, formatted_schema: str, db_description: str, instructions, memory_context: str = None + def _format_single_table(self, table_info: List) -> str: + """ + Format a single table's information. + + Args: + table_info: Table information in the structure + [name, description, foreign_keys, columns] + + Returns: + Formatted table string + """ + table_name = table_info[0] + table_description = table_info[1] + foreign_keys = table_info[2] + columns = table_info[3] + + # Format table header + table_str = f"Table: {table_name} - {table_description}\n" + + # Format columns + table_str += self._format_table_columns(columns) + + # Format foreign keys + table_str += self._format_foreign_keys(foreign_keys) + + return table_str + + def _format_table_columns(self, columns: List) -> str: + """ + Format table columns information. + + Args: + columns: List of column dictionaries + + Returns: + Formatted columns string + """ + columns_str = "" + for column in columns: + column_str = self._format_single_column(column) + columns_str += column_str + "\n" + return columns_str + + def _format_single_column(self, column: dict) -> str: + """ + Format a single column's information. + + Args: + column: Column dictionary with metadata + + Returns: + Formatted column string + """ + col_name = column.get("columnName", "") + col_type = column.get("dataType", None) + col_description = column.get("description", "") + col_key = column.get("keyType", None) + nullable = column.get("nullable", False) + + key_info = ( + ", PRIMARY KEY" + if col_key == "PRI" + else ", FOREIGN KEY" if col_key == "FK" else "" + ) + return (f" - {col_name} ({col_type},{key_info},{col_key}," + f"{nullable}): {col_description}") + + def _format_foreign_keys(self, foreign_keys: dict) -> str: + """ + Format foreign keys information. + + Args: + foreign_keys: Dictionary of foreign key information + + Returns: + Formatted foreign keys string + """ + if not isinstance(foreign_keys, dict) or not foreign_keys: + return "" + + fk_str = " Foreign Keys:\n" + for fk_name, fk_info in foreign_keys.items(): + column = fk_info.get("column", "") + ref_table = fk_info.get("referenced_table", "") + ref_column = fk_info.get("referenced_column", "") + fk_str += f" - {fk_name}: {column} references {ref_table}.{ref_column}\n" + + return fk_str + + def _build_prompt( # pylint: disable=too-many-arguments, too-many-positional-arguments + self, user_input: str, formatted_schema: str, + db_description: str, instructions, memory_context: str | None = None ) -> str: """ Build the prompt for Claude to analyze the query. @@ -118,7 +173,7 @@ def _build_prompt( Returns: The formatted prompt for Claude """ - + # Include memory context in the prompt if available memory_section = "" if memory_context and memory_context.strip(): @@ -137,7 +192,7 @@ def _build_prompt( 6. Consider any patterns or preferences the user has shown in past interactions """ - + prompt = f""" You must strictly follow the instructions below. Deviations will result in a penalty to your confidence score. @@ -237,5 +292,5 @@ def _build_prompt( 12. For personal queries, FIRST check memory context for user identification. If user identity is found in memory context (user name, previous personal queries, etc.), the query IS translatable. 13. CRITICAL PERSONALIZATION CHECK: If missing user identification/personalization is a significant or primary component of the query (e.g., "show my orders", "my account balance", "my recent purchases", "how many employees I have", "products I own") AND no user identification is available in memory context or schema, set "is_sql_translatable" to false. However, if memory context contains user identification (like user name or previous successful personal queries), then personal queries ARE translatable even if they are the primary component of the query. - Again: OUTPUT ONLY VALID JSON. No explanations outside the JSON block. """ + Again: OUTPUT ONLY VALID JSON. No explanations outside the JSON block. """ # pylint: disable=line-too-long return prompt diff --git a/api/agents/follow_up_agent.py b/api/agents/follow_up_agent.py index e5d596f4..3798f3b5 100644 --- a/api/agents/follow_up_agent.py +++ b/api/agents/follow_up_agent.py @@ -30,14 +30,13 @@ """ -class FollowUpAgent(BaseAgent): +class FollowUpAgent(BaseAgent): # pylint: disable=too-few-public-methods """Agent for generating helpful follow-up questions when queries fail or are off-topic.""" def generate_follow_up_question( - self, + self, user_question: str, - analysis_result: dict, - found_tables: list = None + analysis_result: dict ) -> str: """ Generate helpful follow-up questions based on failed SQL translation. @@ -51,13 +50,16 @@ def generate_follow_up_question( Returns: str: Conversational follow-up response """ - + # Extract key information from analysis result - is_translatable = analysis_result.get("is_sql_translatable", False) if analysis_result else False + is_translatable = ( + analysis_result.get("is_sql_translatable", False) + if analysis_result else False + ) missing_info = analysis_result.get("missing_information", []) if analysis_result else [] ambiguities = analysis_result.get("ambiguities", []) if analysis_result else [] - explanation = analysis_result.get("explanation", "No detailed explanation available") if analysis_result else "No analysis result available" - + explanation = (analysis_result.get("explanation", "No detailed explanation available") + if analysis_result else "No analysis result available") # Prepare the prompt prompt = FOLLOW_UP_GENERATION_PROMPT.format( QUESTION=user_question, @@ -66,17 +68,17 @@ def generate_follow_up_question( AMBIGUITIES=ambiguities, EXPLANATION=explanation ) - + try: completion_result = completion( model=Config.COMPLETION_MODEL, messages=[{"role": "user", "content": prompt}], temperature=0.9 ) - + response = completion_result.choices[0].message.content.strip() return response - - except Exception as e: + + except Exception: # pylint: disable=broad-exception-caught # Fallback response if LLM call fails - return "I'm having trouble generating a follow-up question right now. Could you try rephrasing your question or providing more specific details about what you're looking for?" + return "Sorry, I couldn't generate a follow-up. Could you clarify your question a bit?" diff --git a/api/agents/relevancy_agent.py b/api/agents/relevancy_agent.py index e063f157..9317ec60 100644 --- a/api/agents/relevancy_agent.py +++ b/api/agents/relevancy_agent.py @@ -70,6 +70,7 @@ class RelevancyAgent(BaseAgent): # pylint: disable=too-few-public-methods """Agent for determining relevancy of queries to database schema.""" + async def get_answer(self, user_question: str, database_desc: dict) -> dict: """Get relevancy assessment for user question against database description.""" self.messages.append( diff --git a/api/agents/taxonomy_agent.py b/api/agents/taxonomy_agent.py deleted file mode 100644 index be527964..00000000 --- a/api/agents/taxonomy_agent.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Taxonomy agent for taxonomy classification of questions and SQL queries.""" - -from litellm import completion -from api.config import Config - - -TAXONOMY_PROMPT = """You are an advanced taxonomy generator. For a pair of question and SQL query \ -provde a single clarification question to the user. -* For any SQL query that contain WHERE clause, provide a clarification question to the user about the \ -generated value. -* Your question can contain more than one clarification related to WHERE clause. -* Please asked only about the clarifications that you need and not extand the answer. -* Please ask in a polite, humen, and concise manner. -* Do not meantion any tables or columns in your ouput!. -* If you dont need any clarification, please answer with "I don't need any clarification." -* The user didnt saw the SQL queryor the tables, so please understand this position and ask the \ -clarification in that way he have the relevent information to answer. -* When you ask the user to confirm a value, please provide the value in your answer. -* Mention only question about values and dont mention the SQL query or the tables in your answer. - -Please create the clarification question step by step. - -Question: -{QUESTION} - -SQL: -{SQL} - -For example: -question: "How many diabetic patients are there?" -SQL: "SELECT COUNT(*) FROM patients WHERE disease_code = 'E11'" -Your output: "The diabitic desease code is E11? If not, please provide the correct diabitic desease code. - -The question to the user:" -""" - - -class TaxonomyAgent: - # pylint: disable=too-few-public-methods - """Agent for taxonomy classification of questions and SQL queries.""" - - def __init__(self): - """Initialize the taxonomy agent.""" - - def get_answer(self, question: str, sql: str) -> str: - """Get taxonomy classification for a question and SQL pair.""" - messages = [ - { - "content": TAXONOMY_PROMPT.format(QUESTION=question, SQL=sql), - "role": "user", - } - ] - completion_result = completion( - model=Config.COMPLETION_MODEL, - messages=messages, - temperature=0, - ) - - answer = completion_result.choices[0].message.content - return answer diff --git a/api/agents/utils.py b/api/agents/utils.py index ac6605c0..53e678a0 100644 --- a/api/agents/utils.py +++ b/api/agents/utils.py @@ -4,7 +4,7 @@ from typing import Any, Dict -class BaseAgent: +class BaseAgent: # pylint: disable=too-few-public-methods """Base class for agents.""" def __init__(self, queries_history: list, result_history: list): diff --git a/api/app_factory.py b/api/app_factory.py index f066fafd..8191c903 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -7,6 +7,7 @@ from fastapi import FastAPI, Request, HTTPException from fastapi.responses import RedirectResponse, JSONResponse from fastapi.staticfiles import StaticFiles +from fastapi_mcp import FastApiMCP from starlette.middleware.sessions import SessionMiddleware from starlette.middleware.base import BaseHTTPMiddleware @@ -17,12 +18,13 @@ from api.routes.auth import auth_router, init_auth from api.routes.graphs import graphs_router from api.routes.database import database_router +from api.routes.tokens import tokens_router # Load environment variables from .env file load_dotenv() logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") -class SecurityMiddleware(BaseHTTPMiddleware): +class SecurityMiddleware(BaseHTTPMiddleware): # pylint: disable=too-few-public-methods """Middleware for security checks including static file access""" STATIC_PREFIX = '/static/' @@ -51,18 +53,15 @@ def create_app(): "Text2SQL with " "Graph-Powered Schema Understanding" ), - ) - - app.add_middleware(ProxyHeadersMiddleware, trusted_hosts="*") + ) + app.add_middleware(ProxyHeadersMiddleware, trusted_hosts="*") - # Add session middleware with explicit settings to ensure OAuth state persists app.add_middleware( SessionMiddleware, secret_key=SECRET_KEY, - session_cookie="qw_session", same_site="lax", # allow top-level OAuth GET redirects to send cookies - https_only=False, # allow http on localhost in development + https_only=False, # True for HTTPS environments (staging/prod), False for HTTP dev max_age=60 * 60 * 24 * 14, # 14 days - measured by seconds ) @@ -81,14 +80,33 @@ def create_app(): app.include_router(auth_router) app.include_router(graphs_router, prefix="/graphs") app.include_router(database_router) + app.include_router(tokens_router, prefix="/tokens") + # app.include_router(mcp_router, prefix="/mcp") setup_oauth_handlers(app, app.state.oauth) + # Control MCP endpoints via environment variable DISABLE_MCP + # Default: MCP is enabled unless DISABLE_MCP is set to true + disable_mcp = os.getenv("DISABLE_MCP", "false").lower() in ("1", "true", "yes") + if disable_mcp: + logging.info("MCP endpoints disabled via DISABLE_MCP environment variable") + else: + mcp = FastApiMCP(app, + name="queryweaver", + description="QueryWeaver MCP API, provides Text2SQL capabilities", + include_operations=["list_databases", + "connect_database", + "database_schema", + "query_database"] + ) + + mcp.mount_http() + @app.exception_handler(Exception) - async def handle_oauth_error(request: Request, exc: Exception): + async def handle_oauth_error(request: Request, exc: Exception): # pylint: disable=unused-argument """Handle OAuth-related errors gracefully""" # Check if it's an OAuth-related error - # TODO check this scenario + # TODO check this scenario, pylint: disable=fixme if "token" in str(exc).lower() or "oauth" in str(exc).lower(): logging.warning("OAuth error occurred: %s", exc) return RedirectResponse(url="/", status_code=302) diff --git a/api/auth/oauth_handlers.py b/api/auth/oauth_handlers.py index 8e25198a..e8740b2e 100644 --- a/api/auth/oauth_handlers.py +++ b/api/auth/oauth_handlers.py @@ -42,7 +42,7 @@ async def handle_callback(provider: str, user_info: Dict[str, Any], api_token: s ) return True - except Exception as exc: # capture exception for logging + except Exception as exc: # capture exception for logging, pylint: disable=broad-exception-caught logging.error("Error handling %s OAuth callback: %s", provider, exc) return False diff --git a/api/auth/user_management.py b/api/auth/user_management.py index b49106da..27c3349e 100644 --- a/api/auth/user_management.py +++ b/api/auth/user_management.py @@ -2,20 +2,36 @@ import base64 import logging -from math import log import os import secrets from functools import wraps from typing import Tuple, Optional, Dict, Any from fastapi import Request, HTTPException, status +from pydantic import BaseModel from api.extensions import db # Get secret key for sessions SECRET_KEY = os.getenv("FASTAPI_SECRET_KEY") if not SECRET_KEY: SECRET_KEY = secrets.token_hex(32) - logging.warning("FASTAPI_SECRET_KEY not set, using generated key. Set this in production!") + logging.warning( + "FASTAPI_SECRET_KEY not set, using generated key. Set this in production!" + ) + + +class IdentityInfo(BaseModel): + """ + Data model for storing identity information. + + Attributes: + identity (Dict[str, Any]): Details about the identity provider and credentials. + user (Dict[str, Any]): Information about the associated user. + new_identity (bool): Whether this is a newly created identity. + """ + identity: Dict[str, Any] + user: Dict[str, Any] + new_identity: bool async def _get_user_info(api_token: str) -> Optional[Dict[str, Any]]: @@ -31,24 +47,28 @@ async def _get_user_info(api_token: str) -> Optional[Dict[str, Any]]: # Select the Organizations graph organizations_graph = db.select_graph("Organizations") - result = await organizations_graph.query(query, { - "api_token": api_token, - }) + result = await organizations_graph.query( + query, + { + "api_token": api_token, + }, + ) if result.result_set: single_result = result.result_set[0] token_valid = single_result[3] - # TODO delete invalid token from DB if token_valid: return { "email": single_result[0], "name": single_result[1], - "picture": single_result[2] + "picture": single_result[2], } + # Delete invalid/expired token from DB for cleanup + await delete_user_token(api_token) return None - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logging.error("Error fetching user info: %s", e) return None @@ -65,15 +85,25 @@ async def delete_user_token(api_token: str): # Select the Organizations graph organizations_graph = db.select_graph("Organizations") - await organizations_graph.query(query, { - "api_token": api_token, - }) + await organizations_graph.query( + query, + { + "api_token": api_token, + }, + ) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logging.error("Error deleting user token: %s", e) -async def ensure_user_in_organizations(provider_user_id: str, email: str, name: str, provider: str, api_token: str, picture: str = None): +async def ensure_user_in_organizations( # pylint: disable=too-many-arguments, disable=too-many-positional-arguments + provider_user_id: str, + email: str, + name: str, + provider: str, + api_token: str, + picture: str | None = None, +) -> tuple[bool, Optional[IdentityInfo]]: """ Check if identity exists in Organizations graph, create if not. Creates separate Identity and User nodes with proper relationships. @@ -81,113 +111,39 @@ async def ensure_user_in_organizations(provider_user_id: str, email: str, name: Returns (is_new_user, user_info) """ # Input validation - if not provider_user_id or not email or not provider: - logging.error("Missing required parameters: provider_user_id=%s, email=%s, provider=%s", - provider_user_id, email, provider) - return False, None - - # Validate email format (basic check) - if "@" not in email or "." not in email: - logging.error("Invalid email format: %s", email) - return False, None - # Validate provider is in allowed list - allowed_providers = ["google", "github"] - if provider not in allowed_providers: - logging.error("Invalid provider: %s", provider) - return False, None + validation_result = _validate_user_input(provider_user_id, email, provider) + if validation_result: + return validation_result try: - # Select the Organizations graph organizations_graph = db.select_graph("Organizations") - - # Extract first and last name - name_parts = (name or "").split(" ", 1) if name else ["", ""] - first_name = name_parts[0] if len(name_parts) > 0 else "" - last_name = name_parts[1] if len(name_parts) > 1 else "" - - # Use MERGE to handle all scenarios in a single atomic operation - merge_query = """ - // First, ensure user exists (merge by email) - MERGE (user:User {email: $email}) - ON CREATE SET - user.first_name = $first_name, - user.last_name = $last_name, - user.created_at = timestamp() - - // Then, merge identity and link to user - MERGE (identity:Identity {provider: $provider, provider_user_id: $provider_user_id}) - ON CREATE SET - identity.email = $email, - identity.name = $name, - identity.picture = $picture, - identity.created_at = timestamp(), - identity.last_login = timestamp() - ON MATCH SET - identity.email = $email, - identity.name = $name, - identity.picture = $picture, - identity.last_login = timestamp() - - // Ensure relationship exists - MERGE (identity)-[:AUTHENTICATES]->(user) - - // Then, create a session linked to the Identity and store the API_Token - MERGE (token:Token {id: $api_token}) - ON CREATE SET - token.created_at = timestamp(), - token.expires_at = timestamp() + 86400000 // 24h expiry - MERGE (identity)-[:HAS_TOKEN]->(token) - - // Return results with flags to determine if this was a new user/identity - RETURN - identity, - user, - identity.created_at = identity.last_login AS is_new_identity, - EXISTS((user)<-[:AUTHENTICATES]-(:Identity)) AS had_other_identities - """ - - result = await organizations_graph.query(merge_query, { - "provider": provider, - "provider_user_id": provider_user_id, - "email": email, - "name": name, - "picture": picture, - "first_name": first_name, - "last_name": last_name, - "api_token": api_token - }) - - if result.result_set: - identity = result.result_set[0][0] - user = result.result_set[0][1] - is_new_identity = result.result_set[0][2] - had_other_identities = result.result_set[0][3] - - # Determine the type of operation for logging - if is_new_identity and not had_other_identities: - # Brand new user (first identity) - logging.info("NEW USER CREATED: provider=%s, provider_user_id=%s, " - "email=%s, name=%s", provider, provider_user_id, email, name) - return True, {"identity": identity, "user": user} - elif is_new_identity and had_other_identities: - # New identity for existing user (cross-provider linking) - logging.info("NEW IDENTITY LINKED TO EXISTING USER: provider=%s, " - "provider_user_id=%s, email=%s, name=%s", - provider, provider_user_id, email, name) - return True, {"identity": identity, "user": user} - else: - # Existing identity login - logging.info("Existing identity found: provider=%s, email=%s", provider, email) - return False, {"identity": identity, "user": user} - else: - logging.error("Failed to create/update identity and user: email=%s", email) - return False, None + first_name, last_name = _extract_name_parts(name) + + merge_query = _build_user_merge_query() + query_params = _build_query_params( + provider, + provider_user_id, + email, + name=name, + picture=picture, + first_name=first_name, + last_name=last_name, + api_token=api_token, + ) + + result = await organizations_graph.query(merge_query, query_params) + return _process_user_result(result, provider, provider_user_id, email, name) except (AttributeError, ValueError, KeyError) as e: logging.error("Error managing user in Organizations graph: %s", e) return False, None - except Exception as e: + except (ConnectionError, TimeoutError) as e: + logging.error( + "Database connection error managing user in Organizations graph: %s", e + ) + return False, None + except Exception as e: # pylint: disable=broad-exception-caught logging.error("Unexpected error managing user in Organizations graph: %s", e) return False, None @@ -196,12 +152,15 @@ async def update_identity_last_login(provider, provider_user_id): """Update the last login timestamp for an existing identity""" # Input validation if not provider or not provider_user_id: - logging.error("Missing required parameters: provider=%s, provider_user_id=%s", - provider, provider_user_id) + logging.error( + "Missing required parameters: provider=%s, provider_user_id=%s", + provider, + provider_user_id, + ) return # Validate provider is in allowed list - allowed_providers = ["google", "github"] + allowed_providers = ["google", "github", "email"] if provider not in allowed_providers: logging.error("Invalid provider: %s", provider) return @@ -213,18 +172,58 @@ async def update_identity_last_login(provider, provider_user_id): SET identity.last_login = timestamp() RETURN identity """ - await organizations_graph.query(update_query, { - "provider": provider, - "provider_user_id": provider_user_id - }) - logging.info("Updated last login for identity: provider=%s, provider_user_id=%s", - provider, provider_user_id) + await organizations_graph.query( + update_query, {"provider": provider, "provider_user_id": provider_user_id} + ) + logging.info( + "Updated last login for identity: provider=%s, provider_user_id=%s", + provider, + provider_user_id, + ) except (AttributeError, ValueError, KeyError) as e: - logging.error("Error updating last login for identity %s/%s: %s", - provider, provider_user_id, e) - except Exception as e: - logging.error("Unexpected error updating last login for identity %s/%s: %s", - provider, provider_user_id, e) + logging.error( + "Error updating last login for identity %s/%s: %s", + provider, + provider_user_id, + e, + ) + except Exception as e: # pylint: disable=broad-exception-caught + logging.error( + "Unexpected error updating last login for identity %s/%s: %s", + provider, + provider_user_id, + e, + ) + + +def get_token(request: Request) -> Optional[str]: + """ + Extract the API token from the request. + """ + + # Check cookies + api_token = request.cookies.get("api_token") + if api_token: + return api_token + + # Check query parameters + api_token = request.query_params.get("api_token") + if api_token: + return api_token + + # Check Authorization header + auth_header = request.headers.get("authorization") or request.headers.get( + "Authorization" + ) + if auth_header: + try: + parts = auth_header.split(None, 1) + if len(parts) == 2 and parts[0].lower() == "bearer": + return parts[1].strip() + except Exception: # pylint: disable=broad-exception-caught + pass + + return None async def validate_user(request: Request) -> Tuple[Optional[Dict[str, Any]], bool]: @@ -234,10 +233,7 @@ async def validate_user(request: Request) -> Tuple[Optional[Dict[str, Any]], boo Includes refresh handling for Google. """ try: - # token might be in the URL if not in the cookie for API access - api_token = request.cookies.get("api_token") - if not api_token: - api_token = request.query_params.get("api_token") + api_token = get_token(request) if api_token: db_info = await _get_user_info(api_token) @@ -247,13 +243,15 @@ async def validate_user(request: Request) -> Tuple[Optional[Dict[str, Any]], boo return None, False - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logging.error("Unexpected error in validate_user: %s", e) return None, False + def token_required(func): """Decorator to protect FastAPI routes with token authentication. Automatically refreshes tokens if expired. + Supports both OAuth and API token authentication. """ @wraps(func) @@ -261,23 +259,22 @@ async def wrapper(request: Request, *args, **kwargs): try: user_info, is_authenticated = await validate_user(request) - if not is_authenticated: - # Second attempt after clearing session to force re-validation - user_info, is_authenticated = await validate_user(request) - if not is_authenticated: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Unauthorized - Please log in" + detail="Unauthorized - Please log in or provide a valid API token", ) # Attach user_id to request.state (like FASTAPI's g.user_id) # we're using the email as BASE64 encoded - request.state.user_id = base64.b64encode(user_info.get("email").encode()).decode() + email = user_info.get("email") + request.state.user_id = base64.b64encode(email.encode()).decode() + request.state.user_email = email + if not request.state.user_id: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Unauthorized - Invalid user" + detail="Unauthorized - Invalid user", ) return await func(request, *args, **kwargs) @@ -288,7 +285,143 @@ async def wrapper(request: Request, *args, **kwargs): logging.error("Unexpected error in token_required: %s", e) raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Unauthorized - Authentication error" - ) + detail="Unauthorized - Authentication error", + ) from e return wrapper + + +def _validate_user_input(provider_user_id: str, email: str, provider: str): + """Validate input parameters for user creation/update.""" + if not provider_user_id or not email or not provider: + logging.error( + "Missing required parameters: provider_user_id=%s, email=%s, provider=%s", + provider_user_id, + email, + provider, + ) + return False, None + + # Validate email format (basic check) + if "@" not in email or "." not in email: + logging.error("Invalid email format: %s", email) + return False, None + + # Validate provider is in allowed list + allowed_providers = ["google", "github", "api", "email"] + if provider not in allowed_providers: + logging.error("Invalid provider: %s", provider) + return False, None + + return None # No validation errors + + +def _extract_name_parts(name: str) -> tuple: + """Extract first and last name from full name.""" + name_parts = (name or "").split(" ", 1) if name else ["", ""] + first_name = name_parts[0] if len(name_parts) > 0 else "" + last_name = name_parts[1] if len(name_parts) > 1 else "" + return first_name, last_name + + +def _build_user_merge_query() -> str: + """Build the Cypher query for user/identity merge operations.""" + return """ + // First, ensure user exists (merge by email) + MERGE (user:User {email: $email}) + ON CREATE SET + user.first_name = $first_name, + user.last_name = $last_name, + user.created_at = timestamp() + + // Then, merge identity and link to user + MERGE (identity:Identity {provider: $provider, provider_user_id: $provider_user_id}) + ON CREATE SET + identity.email = $email, + identity.name = $name, + identity.picture = $picture, + identity.created_at = timestamp(), + identity.last_login = timestamp() + ON MATCH SET + identity.email = $email, + identity.name = $name, + identity.picture = $picture, + identity.last_login = timestamp() + + // Ensure relationship exists + MERGE (identity)-[:AUTHENTICATES]->(user) + + // Then, create a session linked to the Identity and store the API_Token + MERGE (token:Token {id: $api_token}) + ON CREATE SET + token.created_at = timestamp(), + token.expires_at = timestamp() + 86400000 // 24h expiry + MERGE (identity)-[:HAS_TOKEN]->(token) + + // Return results with flags to determine if this was a new user/identity + RETURN + identity, + user, + identity.created_at = identity.last_login AS is_new_identity + """ + + +def _build_query_params( # pylint: disable=too-many-arguments + provider: str, + provider_user_id: str, + email: str, + *, + name: str, + picture: str | None = None, + first_name: str, + last_name: str, + api_token: str +) -> dict: + """Build query parameters for the database operation.""" + return { + "provider": provider, + "provider_user_id": provider_user_id, + "email": email, + "name": name, + "picture": picture, + "first_name": first_name, + "last_name": last_name, + "api_token": api_token, + } + + +def _process_user_result( + result, provider: str, provider_user_id: str, email: str, name: str +): + """Process the database result and return appropriate response.""" + if result.result_set: + identity: dict[str, Any] = result.result_set[0][0] + user: dict[str, Any] = result.result_set[0][1] + is_new_identity: bool = result.result_set[0][2] + + if is_new_identity: + # New identity for existing user (cross-provider linking) + logging.info( + "NEW IDENTITY LINKED TO USER: provider=%s, " + "provider_user_id=%s, email=%s, name=%s", + provider, + provider_user_id, + email, + name, + ) + return True, { + "identity": identity, + "user": user, + "new_identity": is_new_identity, + } + + # Existing identity login + logging.info("Existing identity found: provider=%s, email=%s", provider, email) + return False, { + "identity": identity, + "user": user, + "new_identity": is_new_identity, + } + + logging.error("Failed to create/update identity and user: email=%s", email) + return False, None diff --git a/api/config.py b/api/config.py index 6396ba47..b95a792e 100644 --- a/api/config.py +++ b/api/config.py @@ -1,7 +1,9 @@ + """ This module contains the configuration for the text2sql module. """ +import os import dataclasses from typing import Union @@ -48,17 +50,20 @@ class Config: """ Configuration class for the text2sql module. """ - - SCHEMA_PATH = "api/schema_schema.json" - EMBEDDING_MODEL_NAME = "azure/text-embedding-ada-002" - COMPLETION_MODEL = "azure/gpt-4.1" - VALIDATOR_MODEL = "azure/gpt-4.1" - TEMPERATURE = 0 + AZURE_FLAG = True + if not os.getenv("OPENAI_API_KEY"): + EMBEDDING_MODEL_NAME = "azure/text-embedding-ada-002" + COMPLETION_MODEL = "azure/gpt-4.1" + else: + AZURE_FLAG = False + EMBEDDING_MODEL_NAME = "openai/text-embedding-ada-002" + COMPLETION_MODEL = "openai/gpt-4.1" + + DB_MAX_DISTINCT: int = 100 # pylint: disable=invalid-name + DB_UNIQUENESS_THRESHOLD: float = 0.5 # pylint: disable=invalid-name SHORT_MEMORY_LENGTH = 5 # Maximum number of questions to keep in short-term memory - config = {} - - EMBEDDING_MODEL = EmbeddingsModel(model_name=EMBEDDING_MODEL_NAME, config=config) + EMBEDDING_MODEL = EmbeddingsModel(model_name=EMBEDDING_MODEL_NAME) FIND_SYSTEM_PROMPT = """ You are an expert in analyzing natural language queries into SQL tables descriptions. diff --git a/api/constants.py b/api/constants.py deleted file mode 100644 index 93532b47..00000000 --- a/api/constants.py +++ /dev/null @@ -1,174 +0,0 @@ -"""Constants and benchmark data for the text2sql application.""" - -EXAMPLES = { - "crm_usecase": [ - ("Which companies have generated the most revenue through closed deals, " - "and how much revenue did they generate?"), - "How many leads converted into deals over the last month", - ("Which companies have open sales opportunities and active SLA agreements " - "in place?"), - ("Which high-value sales opportunities (value > $50,000) have upcoming meetings " - "scheduled, and what companies are they associated with?"), - ], - "ERP_system": [ - # ("What is the total value of all purchase orders created in the last " - # "quarter?"), - # ("Which suppliers have the highest number of active purchase orders, " - # "and what is the total value of those orders?"), - "What is the total order value for customer Almo Office?", - "Show the total amount of all orders placed on 11/24", - "What's the profit for order SO2400002?", - "List all confirmed orders form today with their final prices", - "How many items are in order SO2400002?", - # Product-Specific Questions - "What is the price of Office Chair (part 0001100)?", - "List all items with quantity greater than 3 units", - "Show me all products with price above $20", - "What's the total cost of all A4 Paper items ordered?", - "Which items have the highest profit margin?", - # Financial Analysis Questions - "Calculate the total profit for this year", - "Show me orders with overall discount greater than 5%", - "What's the average profit percentage across all items?", - "List orders with final price exceeding $700", - "Show me items with profit margin above 50%", - # Customer-Related Questions - "How many orders has customer 100038 placed?", - "What's the total purchase amount by Almo Office?", - "List all orders with their customer names and contact details", - "Show me customers with orders above $500", - "What's the average order value per customer?", - # Inventory/Stock Questions - "Which items have zero quantity?", - "Show me all items with their crate types", - "List products with their packaging details", - "What's the total quantity ordered for each product?", - "Show me items with pending shipments", - ], -} - - -BENCHMARK = [ - { - "question": ("List all contacts who are associated with companies that have at " - "least one active deal in the pipeline, and include the deal stage."), - "sql": ("SELECT DISTINCT c.contact_id, c.first_name, c.last_name, d.deal_id, " - "d.deal_name, ds.stage_name FROM contacts AS c " - "JOIN company_contacts AS cc ON c.contact_id = cc.contact_id " - "JOIN companies AS co ON cc.company_id = co.company_id " - "JOIN deals AS d ON co.company_id = d.company_id " - "JOIN deal_stages AS ds ON d.stage_id = ds.stage_id " - "WHERE ds.is_active = 1;"), - }, - { - "question": ("Which sales representatives (users) have closed deals worth more " - "than $100,000 in the past year, and what was the total value of " - "deals they closed?"), - "sql": ("SELECT u.user_id, u.first_name, u.last_name, SUM(d.amount) AS " - "total_closed_value FROM users AS u " - "JOIN deals AS d ON u.user_id = d.owner_id " - "JOIN deal_stages AS ds ON d.stage_id = ds.stage_id " - "WHERE ds.stage_name = 'Closed Won' AND d.close_date >= " - "DATE_SUB(CURDATE(), INTERVAL 1 YEAR) GROUP BY u.user_id " - "HAVING total_closed_value > 100000;"), - }, - { - "question": ("Find all contacts who attended at least one event and were later " - "converted into leads that became opportunities within three months " - "of the event."), - "sql": ("SELECT DISTINCT c.contact_id, c.first_name, c.last_name " - "FROM contacts AS c " - "JOIN event_attendees AS ea ON c.contact_id = ea.contact_id " - "JOIN events AS e ON ea.event_id = e.event_id " - "JOIN leads AS l ON c.contact_id = l.contact_id " - "JOIN opportunities AS o ON l.lead_id = o.lead_id " - "WHERE o.created_date BETWEEN e.event_date AND " - "DATE_ADD(e.event_date, INTERVAL 3 MONTH);"), - }, - { - "question": ("Which customers have the highest lifetime value based on their " - "total invoice payments, including refunds and discounts?"), - "sql": ("SELECT c.contact_id, c.first_name, c.last_name, " - "SUM(i.total_amount - COALESCE(r.refund_amount, 0) - " - "COALESCE(d.discount_amount, 0)) AS lifetime_value " - "FROM contacts AS c " - "JOIN orders AS o ON c.contact_id = o.contact_id " - "JOIN invoices AS i ON o.order_id = i.order_id " - "LEFT JOIN refunds AS r ON i.invoice_id = r.invoice_id " - "LEFT JOIN discounts AS d ON i.invoice_id = d.invoice_id " - "GROUP BY c.contact_id ORDER BY lifetime_value DESC LIMIT 10;"), - }, - { - "question": ("Show all deals that have involved at least one email exchange, " - "one meeting, and one phone call with a contact in the past six months."), - "sql": ("SELECT DISTINCT d.deal_id, d.deal_name FROM deals AS d " - "JOIN contacts AS c ON d.contact_id = c.contact_id " - "JOIN emails AS e ON c.contact_id = e.contact_id " - "JOIN meetings AS m ON c.contact_id = m.contact_id " - "JOIN phone_calls AS p ON c.contact_id = p.contact_id " - "WHERE e.sent_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH) " - "AND m.meeting_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH) " - "AND p.call_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH);"), - }, - { - "question": ("Which companies have the highest number of active support tickets, " - "and how does their number of tickets correlate with their total deal value?"), - "sql": ("SELECT co.company_id, co.company_name, COUNT(st.ticket_id) AS active_tickets, " - "SUM(d.amount) AS total_deal_value FROM companies AS co " - "LEFT JOIN support_tickets AS st ON co.company_id = st.company_id " - "AND st.status = 'Open' " - "LEFT JOIN deals AS d ON co.company_id = d.company_id " - "GROUP BY co.company_id ORDER BY active_tickets DESC;"), - }, - { - "question": ("Retrieve all contacts who are assigned to a sales rep but have not " - "been contacted via email, phone, or meeting in the past three months."), - "sql": ("SELECT c.contact_id, c.first_name, c.last_name FROM contacts AS c " - "JOIN users AS u ON c.owner_id = u.user_id " - "LEFT JOIN emails AS e ON c.contact_id = e.contact_id " - "AND e.sent_date >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH) " - "LEFT JOIN phone_calls AS p ON c.contact_id = p.contact_id " - "AND p.call_date >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH) " - "LEFT JOIN meetings AS m ON c.contact_id = m.contact_id " - "AND m.meeting_date >= DATE_SUB(CURDATE(), INTERVAL 3 MONTH) " - "WHERE e.contact_id IS NULL AND p.contact_id IS NULL " - "AND m.contact_id IS NULL;"), - }, - { - "question": ("Which email campaigns resulted in the highest number of closed deals, " - "and what was the average deal size for those campaigns?"), - "sql": ("SELECT ec.campaign_id, ec.campaign_name, COUNT(d.deal_id) AS closed_deals, " - "AVG(d.amount) AS avg_deal_value FROM email_campaigns AS ec " - "JOIN contacts AS c ON ec.campaign_id = c.campaign_id " - "JOIN deals AS d ON c.contact_id = d.contact_id " - "JOIN deal_stages AS ds ON d.stage_id = ds.stage_id " - "WHERE ds.stage_name = 'Closed Won' GROUP BY ec.campaign_id " - "ORDER BY closed_deals DESC;"), - }, - { - "question": ("Find the average time it takes for a lead to go from creation to " - "conversion into a deal, broken down by industry."), - "sql": ("SELECT ind.industry_name, AVG(DATEDIFF(d.close_date, l.created_date)) " - "AS avg_conversion_time FROM leads AS l " - "JOIN companies AS co ON l.company_id = co.company_id " - "JOIN industries AS ind ON co.industry_id = ind.industry_id " - "JOIN opportunities AS o ON l.lead_id = o.lead_id " - "JOIN deals AS d ON o.opportunity_id = d.opportunity_id " - "WHERE d.stage_id IN (SELECT stage_id FROM deal_stages " - "WHERE stage_name = 'Closed Won') GROUP BY ind.industry_name " - "ORDER BY avg_conversion_time ASC;"), - }, - { - "question": ("Which sales reps (users) have the highest win rate, calculated as " - "the percentage of their assigned leads that convert into closed deals?"), - "sql": ("SELECT u.user_id, u.first_name, u.last_name, " - "COUNT(DISTINCT d.deal_id) / COUNT(DISTINCT l.lead_id) * 100 AS win_rate " - "FROM users AS u " - "JOIN leads AS l ON u.user_id = l.owner_id " - "LEFT JOIN opportunities AS o ON l.lead_id = o.lead_id " - "LEFT JOIN deals AS d ON o.opportunity_id = d.opportunity_id " - "JOIN deal_stages AS ds ON d.stage_id = ds.stage_id " - "WHERE ds.stage_name = 'Closed Won' GROUP BY u.user_id " - "ORDER BY win_rate DESC;"), - }, -] diff --git a/api/graph.py b/api/graph.py index 57330ce3..030305da 100644 --- a/api/graph.py +++ b/api/graph.py @@ -238,7 +238,7 @@ async def _find_connecting_tables( return result -async def find( +async def find( # pylint: disable=too-many-locals graph_id: str, queries_history: List[str], db_description: str = None @@ -258,10 +258,7 @@ async def find( user_query = queries_history[-1] previous_queries = queries_history[:-1] - logging.info( - "Calling LLM to find relevant tables/columns for query: %s", - user_query - ) + logging.info("Calling LLM to find relevant tables/columns for query") completion_result = completion( model=Config.COMPLETION_MODEL, @@ -343,7 +340,7 @@ def _get_unique_tables(tables_list): table_info[3] = [dict(od) for od in table_info[3]] table_info[2] = "Foreign keys: " + table_info[2] unique_tables[table_name] = table_info - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught print(f"Error: {table_info}, Exception: {e}") # Return the values (the unique table info lists) diff --git a/api/helpers/crm_data_generator.py b/api/helpers/crm_data_generator.py deleted file mode 100644 index c7a0b28f..00000000 --- a/api/helpers/crm_data_generator.py +++ /dev/null @@ -1,769 +0,0 @@ -""" -CRM data generator module for creating complete database schemas with relationships. - -This module provides functionality to generate comprehensive CRM database schemas -with proper primary/foreign key relationships and table structures. -""" - -import json -import os -import time -from typing import Any, Dict, List, Optional - -import requests -from litellm import completion - -OUTPUT_FILE = "complete_crm_schema.json" -MAX_RETRIES = 3 -RETRY_DELAY = 5 # seconds - -# Global registry to track primary and foreign keys across tables -key_registry = { - "primary_keys": {}, # table_name -> primary_key_column - "foreign_keys": {}, # table_name -> {column_name -> (referenced_table, referenced_column)} - "processed_tables": set(), # Set of tables that have been processed - "table_relationships": {}, # table_name -> set of related tables -} - - -def load_initial_schema(file_path: str) -> Dict[str, Any]: - """Load the initial schema file with table names""" - try: - with open(file_path, "r", encoding="utf-8") as file: - schema = json.load(file) - print(f"Loaded initial schema with {len(schema.get('tables', {}))} tables") - return schema - except Exception as e: - print(f"Error loading schema file: {e}") - return {"database": "crm_system", "tables": {}} - - -def save_schema(schema: Dict[str, Any], output_file: str = OUTPUT_FILE) -> None: - """Save the current schema to a file with metadata""" - # Add metadata - if "metadata" not in schema: - schema["metadata"] = {} - - schema["metadata"]["last_updated"] = time.strftime("%Y-%m-%d %H:%M:%S") - schema["metadata"]["completed_tables"] = len(key_registry["processed_tables"]) - schema["metadata"]["total_tables"] = len(schema.get("tables", {})) - schema["metadata"]["key_registry"] = { - "primary_keys": key_registry["primary_keys"], - "foreign_keys": key_registry["foreign_keys"], - "table_relationships": {k: list(v) for k, v in key_registry["table_relationships"].items()}, - } - - with open(output_file, "w", encoding="utf-8") as file: - json.dump(schema, file, indent=2) - print(f"Schema saved to {output_file}") - - -def update_key_registry(table_name: str, table_data: Dict[str, Any]) -> None: - """Update the key registry with information from a processed table""" - # Mark table as processed - key_registry["processed_tables"].add(table_name) - - # Track primary keys - if "columns" in table_data: - for col_name, col_data in table_data["columns"].items(): - if col_data.get("key") == "PRI": - key_registry["primary_keys"][table_name] = col_name - break - - # Track foreign keys and relationships - if "foreign_keys" in table_data: - if table_name not in key_registry["foreign_keys"]: - key_registry["foreign_keys"][table_name] = {} - - if table_name not in key_registry["table_relationships"]: - key_registry["table_relationships"][table_name] = set() - - for fk_data in table_data["foreign_keys"].values(): - column = fk_data.get("column") - ref_table = fk_data.get("referenced_table") - ref_column = fk_data.get("referenced_column") - - if column and ref_table and ref_column: - key_registry["foreign_keys"][table_name][column] = ( - ref_table, - ref_column, - ) - - # Update relationships - key_registry["table_relationships"][table_name].add(ref_table) - - # Ensure the referenced table has an entry - if ref_table not in key_registry["table_relationships"]: - key_registry["table_relationships"][ref_table] = set() - - # Add the reverse relationship - key_registry["table_relationships"][ref_table].add(table_name) - - -def find_related_tables(table_name: str, all_tables: List[str]) -> List[str]: - """Find tables that might be related to the current table""" - related = [] - - # Check registry first for already established relationships - if table_name in key_registry["table_relationships"]: - related.extend(key_registry["table_relationships"][table_name]) - - # Extract base name - base_parts = table_name.split("_") - - for other_table in all_tables: - if other_table == table_name or other_table in related: - continue - - # Direct naming relationship - if table_name in other_table or other_table in table_name: - related.append(other_table) - continue - - # Check for common roots - other_parts = other_table.split("_") - for part in base_parts: - if part in other_parts and len(part) > 3: # Avoid short common words - related.append(other_table) - break - - return list(set(related)) # Remove duplicates - - -def get_table_prompt( - table_name: str, schema: Dict[str, Any], all_table_names: List[str], topology -) -> str: - """Generate a prompt for the LLM to create a table schema with proper relationships""" - existing_tables = schema.get("tables", {}) - - # Find related tables - related_tables = find_related_tables(table_name, all_table_names) - related_tables_str = ", ".join(related_tables) if related_tables else "None identified yet" - - # # Suggest primary key pattern - # table_base = table_name.split("_")[0] if "_" in table_name else table_name - # suggested_pk = f"{table_name}_id" # Default pattern - - # # Check if related tables have primary keys to follow same pattern - # for related in related_tables: - # if related in key_registry["primary_keys"]: - # related_pk = key_registry["primary_keys"][related] - # if related_pk.endswith("_id") and related in related_pk: - # # Follow the same pattern - # suggested_pk = f"{table_name}_id" - # break - - # Prepare foreign key suggestions - fk_suggestions = [] - for related in related_tables: - if related in key_registry["primary_keys"]: - fk_suggestions.append( - { - "column": f"{related}_id", - "referenced_table": related, - "referenced_column": key_registry["primary_keys"][related], - } - ) - - fk_suggestions_str = "" - if fk_suggestions: - fk_suggestions_str = "Consider these foreign key relationships:\n" - for i, fk in enumerate(fk_suggestions[:5]): # Limit to 5 suggestions - fk_suggestions_str += ( - f"{i+1}. {fk['column']} -> {fk['referenced_table']}.{fk['referenced_column']}\n" - ) - - # Include examples of related tables that have been processed - related_examples = "" - example_count = 0 - for related in related_tables: - if ( - related in existing_tables - and isinstance(existing_tables[related], dict) - and "columns" in existing_tables[related] - and example_count < 2 - ): - related_examples += ( - f"\nRelated table example:\n```json\n" - f"{json.dumps({related: existing_tables[related]}, indent=2)}\n```\n" - ) - example_count += 1 - - # Use contacts table as primary example if no related examples found - contacts_example = """ -{ - "contacts": { - "description": ("Stores information about individual contacts within the CRM " - "system, including personal details and relationship to companies."), - "columns": { - "contact_id": { - "description": "Unique identifier for each contact", - "type": "int(11)", - "null": "NO", - "key": "PRI", - "default": null, - "extra": "auto_increment" - }, - "first_name": { - "description": "Contact's first name", - "type": "varchar(50)", - "null": "NO", - "key": "", - "default": null, - "extra": "" - }, - "email": { - "description": "Contact's primary email address", - "type": "varchar(100)", - "null": "NO", - "key": "UNI", - "default": null, - "extra": "" - }, - "company_id": { - "description": "Foreign key to the companies table", - "type": "int(11)", - "null": "YES", - "key": "MUL", - "default": null, - "extra": "" - }, - "created_date": { - "description": "Date and time when the contact was created", - "type": "timestamp", - "null": "NO", - "key": "", - "default": "CURRENT_TIMESTAMP", - "extra": "" - }, - "updated_date": { - "description": "Date and time when the contact was last updated", - "type": "timestamp", - "null": "YES", - "key": "", - "default": null, - "extra": "on update CURRENT_TIMESTAMP" - } - }, - "indexes": { - "PRIMARY": { - "columns": [ - { - "name": "contact_id", - "sub_part": null, - "seq_in_index": 1 - } - ], - "unique": true, - "type": "BTREE" - }, - "email_unique": { - "columns": [ - { - "name": "email", - "sub_part": null, - "seq_in_index": 1 - } - ], - "unique": true, - "type": "BTREE" - }, - "company_id_index": { - "columns": [ - { - "name": "company_id", - "sub_part": null, - "seq_in_index": 1 - } - ], - "unique": false, - "type": "BTREE" - } - }, - "foreign_keys": { - "fk_contacts_company": { - "column": "company_id", - "referenced_table": "companies", - "referenced_column": "company_id" - } - } - } -} -""" - # Create context about the table's purpose - table_context = get_table_context(table_name, related_tables) - keys = json.dumps(topology["tables"][table_name]) - prompt = f""" -You are an expert database architect specializing in CRM systems. Create a detailed -JSON schema for the '{table_name}' table in our CRM database. - -CONTEXT ABOUT THIS TABLE: -{table_context} - -POTENTIALLY RELATED TABLES: -{related_tables_str} - -The primary Key and the foreign keys (topology) for this table should include the following: -{keys} - -{fk_suggestions_str} - -Your response must include: -1. A comprehensive description of the table's purpose -2. All relevant columns with: - - Detailed descriptions - - Appropriate MySQL data types - - NULL/NOT NULL constraints - - Key designations (PRI, UNI, MUL, etc.) - - Default values - - Extra properties (auto_increment, on update, etc.) -3. All necessary indexes including: - - Primary key index - - Unique constraints - - Foreign key indexes - - Other performance indexes -4. All foreign key relationships with: - - Constraint names - - Referenced tables and columns -5. Ensure that you using the exact keys from the topology, PK is for primary key and FK is for foreign key. - -EXACTLY FOLLOW THIS FORMAT from our contacts table: -```json -{contacts_example} -``` -{related_examples} - -IMPORTANT GUIDELINES: -- Always include standard timestamps (created_date, updated_date) for all tables -- All tables should have a primary key with auto_increment -- Follow proper MySQL data type conventions -- Include appropriate indexes for performance -- Every column needs a description, type, null status -- All names should follow snake_case convention -- For many-to-many relationships, create appropriate junction tables -- Ensure referential integrity with foreign key constraints - -Return ONLY valid JSON for the '{table_name}' table structure without any -explanation or additional text: -{{ - "{table_name}": {{ - "description": "...", - "columns": {{...}}, - "indexes": {{...}}, - "foreign_keys": {{...}} - }} -}} -""" - return prompt - - -def get_table_context(table_name: str, related_tables: List[str]) -> str: - """Generate contextual information about a table based on its name and related tables""" - # Extract words from table name - words = table_name.replace("_", " ").split() - - # Common CRM entities - entities = { - "contact": "Contains information about individuals", - "company": "Contains information about organizations/businesses", - "lead": "Represents potential customers or sales opportunities", - "opportunity": "Represents qualified sales opportunities", - "deal": "Represents sales deals in progress or completed", - "task": "Represents activities or to-do items", - "meeting": "Contains information about scheduled meetings", - "call": "Contains information about phone calls", - "email": "Contains information about email communication", - "user": "Contains information about CRM system users", - "product": "Contains information about products or services", - "quote": "Contains information about price quotes", - "invoice": "Contains information about invoices", - "order": "Contains information about customer orders", - "subscription": "Contains information about recurring subscriptions", - "ticket": "Contains information about support tickets", - "campaign": "Contains information about marketing campaigns", - } - - # Common relationship patterns - relationship_patterns = { - "tags": "This is a tagging or categorization table that likely links to various entities", - "notes": "This contains notes or comments associated with other entities", - "addresses": "This contains address information associated with other entities", - "preferences": "This contains preference settings associated with other entities", - "relationships": "This defines relationships between entities", - "social": "This contains social media information", - "assignments": "This tracks assignment of entities to users", - "sources": "This tracks where entities originated from", - "statuses": "This defines possible status values for entities", - "types": "This defines type categories for entities", - "stages": "This defines stage progression for entities", - "logs": "This tracks history or logs of activities", - "attachments": "This contains file attachments", - "performance": "This tracks performance metrics", - "feedback": "This contains feedback information", - "settings": "This contains configuration settings", - } - - context = f"The '{table_name}' table appears to be " - - # Check if this is a junction/linking table - if "_" in table_name and not any(p in table_name for p in relationship_patterns): - parts = table_name.split("_") - if len(parts) == 2 and all(len(p) > 2 for p in parts): - return (f"This appears to be a junction table linking '{parts[0]}' and " - f"'{parts[1]}', likely with a many-to-many relationship.") - - # Check for main entities - for entity, description in entities.items(): - if entity in words: - context += f"{description}. " - break - else: - context += "part of the CRM system. " - - # Check for relationship patterns - for pattern, description in relationship_patterns.items(): - if pattern in table_name: - context += f"{description}. " - break - - # Add related tables info - if related_tables: - context += ( - f"It appears to be related to the following tables: {', '.join(related_tables)}. " - ) - - # Guess if it's a child table - for related in related_tables: - if related in table_name and len(related) < len(table_name): - context += f"It may be a child or detail table for the {related} table. " - break - - return context - - -def call_llm_api(prompt: str, retries: int = MAX_RETRIES) -> Optional[str]: - """Call the LLM API with the given prompt, with retry logic""" - for attempt in range(1, retries + 1): - try: - config = {} - config["temperature"] = 0.5 - config["response_format"] = {"type": "json_object"} - - response = completion( - model="gemini/gemini-2.0-flash", - messages=[{"role": "user", "content": prompt}], - **config, - ) - result = ( - response.json() - .get("choices", [{}])[0] - .get("message", "") - .get("content", "") - .strip() - ) - if result: - return result - - print(f"Empty response from API (attempt {attempt}/{retries})") - - except requests.exceptions.RequestException as e: - print(f"API request error (attempt {attempt}/{retries}): {e}") - - if attempt < retries: - sleep_time = RETRY_DELAY * attempt - print(f"Retrying in {sleep_time} seconds...") - time.sleep(sleep_time) - - print("All retry attempts failed") - return None - - -def parse_llm_response(response: str, table_name: str) -> Optional[Dict[str, Any]]: - """Parse the LLM response and extract the table schema with validation""" - try: - # Extract JSON from response if needed - if "```json" in response: - response = response.split("```json")[1].split("```")[0].strip() - elif "```" in response: - response = response.split("```")[1].strip() - - # Handle common formatting issues - response = response.replace("\n", " ").replace("\r", " ") - - # Cleanup any trailing/leading text - start_idx = response.find("{") - end_idx = response.rfind("}") + 1 - if 0 <= start_idx < end_idx: - response = response[start_idx:end_idx] - - parsed = json.loads(response) - - # Validation of required components - if table_name in parsed: - table_data = parsed[table_name] - required_keys = ["description", "columns", "indexes", "foreign_keys"] - - # Check if all required sections exist - if all(key in table_data for key in required_keys): - # Verify columns have required attributes - for col_name, col_data in table_data["columns"].items(): - required_col_attrs = ["description", "type", "null"] - if not all(attr in col_data for attr in required_col_attrs): - print(f"Warning: Column {col_name} is missing required attributes") - - return {table_name: table_data} - - missing = [key for key in required_keys if key not in table_data] - print(f"Warning: Table schema missing required sections: {missing}") - return {table_name: table_data} # Return anyway, but with warning - - # Try to get the first key if table_name is not found - first_key = next(iter(parsed)) - print(f"Warning: Table name mismatch. Expected {table_name}, got {first_key}") - return {table_name: parsed[first_key]} - except Exception as e: - print(f"Error parsing LLM response for {table_name}: {e}") - print(f"Raw response: {response[:500]}...") # Show first 500 chars - return None - - -def process_table( - table_name: str, schema: Dict[str, Any], all_table_names: List[str], topology -) -> Dict[str, Any]: - """Process a single table and update the schema""" - print(f"Processing table: {table_name}") - - # Skip if table already has detailed schema - if ( - table_name in schema["tables"] - and isinstance(schema["tables"][table_name], dict) - and "columns" in schema["tables"][table_name] - and "indexes" in schema["tables"][table_name] - and "foreign_keys" in schema["tables"][table_name] - ): - print(f"Table {table_name} already processed. Skipping.") - return schema - - # Generate prompt for this table - prompt = get_table_prompt(table_name, schema["tables"], all_table_names, topology) - - # Call LLM API - response = call_llm_api(prompt) - if not response: - print(f"Failed to get response for {table_name}. Skipping.") - return schema - - # Parse response - table_schema = parse_llm_response(response, table_name) - if not table_schema: - print(f"Failed to parse response for {table_name}. Skipping.") - return schema - - # Update schema - schema["tables"].update(table_schema) - print(f"Successfully processed {table_name}") - - # Save intermediate results - save_schema(schema, f"intermediate_{table_name.replace('/', '_')}.json") - - return schema - - -def main(): - """Main function to generate complete CRM schema with relationships.""" - # Load the initial schema with table names - initial_schema_path = "examples/crm_tables.json" # Replace with your actual file path - initial_schema = load_initial_schema(initial_schema_path) - - # Get the list of tables to process - tables = list(initial_schema.get("tables", {}).keys()) - all_table_names = tables.copy() # Keep a full list for reference - - topology = generate_keys(tables) - - # Initialize our working schema - schema = {"database": initial_schema.get("database", "crm_system"), "tables": {}} - - # If we have existing work, load it - if os.path.exists(OUTPUT_FILE): - try: - with open(OUTPUT_FILE, "r", encoding="utf-8") as file: - schema = json.load(file) - print(f"Loaded existing schema from {OUTPUT_FILE}") - except Exception as e: - print(f"Error loading existing schema: {e}") - - # Prioritize tables to process - process base tables first - def table_priority(table_name): - # Base tables should be processed first - if "_" not in table_name: - return 0 - # Junction tables last - if table_name.count("_") > 1: - return 2 - # Related tables in the middle - return 1 - - # Sort tables by priority - tables.sort(key=table_priority) - - # Process tables - for i, table_name in enumerate(tables): - print( - f"\nProcessing table {i+1}/{len(tables)}: {table_name} " - f"(Priority: {table_priority(table_name)})" - ) - schema = process_table(table_name, schema, all_table_names, topology) - - # Save progress after each table - save_schema(schema) - - # Add delay to avoid rate limits - if i < len(tables) - 1: - delay = 2 + (0.5 * i % 5) # Varied delay to help avoid pattern detection - print(f"Waiting {delay} seconds before next request...") - time.sleep(delay) - - print(f"\nCompleted processing all {len(tables)} tables") - print(f"Final schema saved to {OUTPUT_FILE}") - - # Validate the final schema - validate_schema(schema) - - -def generate_keys(tables) -> Dict[str, Any]: - """Generate primary and foreign keys for CRM tables.""" - path = "examples/crm_topology.json" - last_key = 0 # Initialize default value - schema = {"tables": {}} # Initialize default schema - - # If we have existing work, load it - if os.path.exists(path): - try: - with open(path, "r", encoding="utf-8") as file: - schema = json.load(file) - if schema.get("tables"): - last_key = tables.index(list(schema["tables"].keys())[-1]) - print(f"Loaded existing schema from {path}") - except Exception as e: - print(f"Error loading existing schema: {e}") - last_key = 0 - - prompt = """ - You are an expert database architect specializing in CRM systems. Create a detailed JSON schema for the '{table_name}' table in our CRM database. - The all tables are: - {tables} - - Please genereate the primary key and foreign key for the table in the following json format: - "contacts": {{ - "contact_id": "PK", - "company_id": "FK", - "user_id": "FK", - "lead_id": "FK" - }}, - - - Only generate the primery key and the foreign keys based on you knowledge on crm databases in the above schema. - Your output for the table '{table_name}': - """ - for table in tables[last_key:]: - - p = prompt.format(table_name=table, tables=tables) - response = call_llm_api(p) - new_table = json.loads(response) - schema["tables"].update(new_table) - - with open(path, "w", encoding="utf-8") as file: - json.dump(schema, file, indent=2) - print(f"Schema saved to {path}") - print(f"Final schema saved to {path}") - - return schema - - -def validate_schema(schema: Dict[str, Any]) -> None: - """Perform final validation on the complete schema""" - print("\nValidating schema...") - issues = [] - - table_count = len(schema["tables"]) - tables_with_columns = sum( - 1 for t in schema["tables"].values() if isinstance(t, dict) and "columns" in t - ) - tables_with_indexes = sum( - 1 for t in schema["tables"].values() if isinstance(t, dict) and "indexes" in t - ) - tables_with_foreign_keys = sum( - 1 for t in schema["tables"].values() if isinstance(t, dict) and "foreign_keys" in t - ) - - print(f"Total tables: {table_count}") - print(f"Tables with columns: {tables_with_columns}") - print(f"Tables with indexes: {tables_with_indexes}") - print(f"Tables with foreign keys: {tables_with_foreign_keys}") - - # Check if all tables have required sections - incomplete_tables = [] - for table_name, table_data in schema["tables"].items(): - if not isinstance(table_data, dict): - incomplete_tables.append(f"{table_name} (empty)") - continue - - missing = [] - if "description" not in table_data or not table_data["description"]: - missing.append("description") - if "columns" not in table_data or not table_data["columns"]: - missing.append("columns") - if "indexes" not in table_data or not table_data["indexes"]: - missing.append("indexes") - if "foreign_keys" not in table_data: # Can be empty, just needs to exist - missing.append("foreign_keys") - - if missing: - incomplete_tables.append(f"{table_name} (missing: {', '.join(missing)})") - - if incomplete_tables: - issues.append(f"Incomplete tables: {len(incomplete_tables)}") - print("Incomplete tables:") - for table in incomplete_tables[:10]: # Show first 10 - print(f" - {table}") - if len(incomplete_tables) > 10: - print(f" ... and {len(incomplete_tables) - 10} more") - - # Check foreign key references - invalid_fks = [] - for table_name, table_data in schema["tables"].items(): - if not isinstance(table_data, dict) or "foreign_keys" not in table_data: - continue - - for fk_name, fk_data in table_data["foreign_keys"].items(): - ref_table = fk_data.get("referenced_table") - ref_column = fk_data.get("referenced_column") - - if ref_table and ref_table not in schema["tables"]: - invalid_fks.append(f"{table_name}.{fk_name} -> {ref_table} (table not found)") - elif ref_table and ref_column: - ref_table_data = schema["tables"].get(ref_table, {}) - if not isinstance(ref_table_data, dict) or "columns" not in ref_table_data: - invalid_fks.append(f"{table_name}.{fk_name} -> {ref_table} (no columns)") - elif ref_column not in ref_table_data.get("columns", {}): - invalid_fks.append( - f"{table_name}.{fk_name} -> {ref_table}.{ref_column} (column not found)" - ) - - if invalid_fks: - issues.append(f"Invalid foreign keys: {len(invalid_fks)}") - print("Invalid foreign keys:") - for fk in invalid_fks[:10]: # Show first 10 - print(f" - {fk}") - if len(invalid_fks) > 10: - print(f" ... and {len(invalid_fks) - 10} more") - - if issues: - print(f"\nValidation complete. Found {len(issues)} issue types.") - else: - print("\nValidation complete. No issues found!") - - -if __name__ == "__main__": - main() diff --git a/api/loaders/__init__.py b/api/loaders/__init__.py new file mode 100644 index 00000000..49f80a74 --- /dev/null +++ b/api/loaders/__init__.py @@ -0,0 +1 @@ +"""Database loaders for QueryWeaver.""" diff --git a/api/loaders/base_loader.py b/api/loaders/base_loader.py index 735a9a57..91141606 100644 --- a/api/loaders/base_loader.py +++ b/api/loaders/base_loader.py @@ -1,16 +1,92 @@ """Base loader module providing abstract base class for data loaders.""" -from abc import ABC -from typing import Tuple +from abc import ABC, abstractmethod +from typing import AsyncGenerator, List, Any, Tuple, TYPE_CHECKING +from api.config import Config class BaseLoader(ABC): """Abstract base class for data loaders.""" @staticmethod - async def load(_graph_id: str, _data) -> Tuple[bool, str]: + @abstractmethod + async def load(_graph_id: str, _data) -> AsyncGenerator[tuple[bool, str], None]: """ Load the graph data into the database. This method must be implemented by any subclass. """ - return False, "Not implemented" + # This method is intended to be implemented by subclasses as an + # async generator (using `yield`). Including a `yield` inside a + # `if TYPE_CHECKING` block makes the function an async generator + # for static type checkers (mypy) while having no runtime effect. + if TYPE_CHECKING: # pragma: no cover - only for type checking + yield True, "" + + @staticmethod + @abstractmethod + def _execute_count_query(cursor, table_name: str, col_name: str) -> Tuple[int, int]: + """ + Execute query to get total count and distinct count for a column. + + Args: + cursor: Database cursor + table_name: Name of the table + col_name: Name of the column + + Returns: + Tuple of (total_count, distinct_count) + """ + + @staticmethod + @abstractmethod + def _execute_distinct_query(cursor, table_name: str, col_name: str) -> List[Any]: + """ + Execute query to get distinct values for a column. + + Args: + cursor: Database cursor + table_name: Name of the table + col_name: Name of the column + + Returns: + List of distinct values + """ + + @classmethod + def extract_distinct_values_for_column( + cls, cursor, table_name: str, col_name: str + ) -> List[str]: + """ + Extract distinct values for a column if it meets the criteria for inclusion. + + Args: + cursor: Database cursor + table_name: Name of the table + col_name: Name of the column + + Returns: + List of formatted distinct values to add to description, or empty list + """ + # Get row counts using database-specific implementation + rows_count, distinct_count = cls._execute_count_query( + cursor, table_name, col_name + ) + + max_distinct = Config.DB_MAX_DISTINCT + uniqueness_threshold = Config.DB_UNIQUENESS_THRESHOLD + + if 0 < distinct_count < max_distinct and distinct_count < ( + uniqueness_threshold * rows_count + ): + # Get distinct values using database-specific implementation + distinct_values = cls._execute_distinct_query(cursor, table_name, col_name) + + if distinct_values: + # Check first value type to avoid objects like dict/bytes + first_val = distinct_values[0] + if isinstance(first_val, (str, int)): + return [ + f"(Optional values: {', '.join(f'({str(v)})' for v in distinct_values)})" + ] + + return [] diff --git a/api/loaders/csv_loader.py b/api/loaders/csv_loader.py deleted file mode 100644 index c65cae30..00000000 --- a/api/loaders/csv_loader.py +++ /dev/null @@ -1,433 +0,0 @@ -"""CSV loader module for processing CSV files and generating database schemas.""" - -import io -from collections import defaultdict -from typing import Tuple - -import tqdm - -from api.loaders.base_loader import BaseLoader -from api.loaders.graph_loader import load_to_graph - - -class CSVLoader(BaseLoader): - """CSV data loader for processing CSV files and loading them into graph database.""" - - @staticmethod - async def load(graph_id: str, data) -> Tuple[bool, str]: - """ - Load the data dictionary CSV file into the graph database. - - Args: - graph_id: The ID of the graph to load the data into - data: CSV file - - Returns: - Tuple of (success, message) - """ - raise NotImplementedError("CSVLoader is not implemented yet") - import pandas as pd - - try: - # Parse CSV data using pandas for better handling of large files - df = pd.read_csv(io.StringIO(data), encoding="utf-8") - - # Check if required columns exist - required_columns = [ - "Schema", - "Domain", - "Field", - "Type", - "Description", - "Related", - "Cardinality", - ] - missing_columns = [col for col in required_columns if col not in df.columns] - - if missing_columns: - return ( - False, - f"Missing required columns in CSV: {', '.join(missing_columns)}", - ) - - db_name = """Abacus Domain Model 25.3.5 -The Abacus Domain Model is a physical manifestation of the hierarchical object model that Abacus Insights uses to store data. (It is not a relational database.) It is a foundational aspect of -the Abacus Insights Platform, interacting with data ingestion, consumption, and data management. The domain model will continue to evolve with the addition of new data sources and -connectors. -The Abacus Domain Model is organized into schemas, which group related domains. We implement each domain as a broad structure with minimal nesting. The model avoids inheritance and -deep nesting to minimize complexity and optimize performance.""" - - # Process data by grouping by Schema and Domain to identify tables - # Group by Schema and Domain to get tables - tables = defaultdict( - lambda: { - "description": "", - "columns": {}, - # 'relationships': [], - "col_descriptions": [], - } - ) - - rel_table = defaultdict(lambda: {"primary_key_table": "", "fk_tables": []}) - relationships = {} - # First pass: Organize data into tables - for idx, row in tqdm.tqdm(df.iterrows(), total=len(df), desc="Organizing data"): - schema = row["Schema"] - domain = row["Domain"] - - table_name = f"{schema}.{domain}" - - # Set table description (use Domain Description if available) - if ( - "Domain Description" in row - and not pd.isna(row["Domain Description"]) - and not tables[table_name]["description"] - ): - tables[table_name]["description"] = row["Domain Description"] - - # Add column information - field = row["Field"] - field_type = row["Type"] if not pd.isna(row["Type"]) else "STRING" - field_desc = row["Description"] if not pd.isna(row["Description"]) else field - - nullable = True # Default to nullable since we don't have explicit null info - if not pd.isna(field): - tables[table_name]["col_descriptions"].append(field_desc) - tables[table_name]["columns"][field] = { - "type": field_type, - "description": field_desc, - "null": nullable, - "key": ( - "PRI" if field.lower().endswith("_id") else "" - ), # Assumption: *_id fields are primary keys - "default": "", - "extra": "", - } - - # Add relationship information if available - if not pd.isna(row["Related"]) and not pd.isna(row["Cardinality"]): - source_field = field - target_table = row["Related"] - # cardinality = row['Cardinality'] - if table_name not in relationships: - relationships[table_name] = [] - relationships[table_name].append( - { - "from": table_name, - "to": target_table, - "source_column": source_field, - "target_column": ( - df.to_dict("records")[idx + 1]["Array Field"] - if not pd.isna(df.to_dict("records")[idx + 1]["Array Field"]) - else "" - ), - "note": "", - } - ) - - # tables[table_name]['relationships'].append({ - # 'source_field': source_field, - # 'target_table': target_table, - # 'cardinality': cardinality, - # 'target_field': df.to_dict("records")[idx+1]['Array Field'] \ - # if not pd.isna(df.to_dict("records")[idx+1] \ - # ['Array Field']) else '' - # }) - tables[target_table]["description"] = field_desc - - else: - field = row["Array Field"] - field_desc = field_desc if not pd.isna(field_desc) else field - # if len(tables[target_table]['col_descriptions']) == 0: - # tables[table_name]['relationships'][-1]['target_field'] = field - tables[target_table]["col_descriptions"].append(field_desc) - tables[target_table]["columns"][field] = { - "type": field_type, - "description": field_desc, - "null": nullable, - "key": ( - "PRI" if field.lower().endswith("_id") else "" - ), # Assumption: *_id fields are primary keys - "default": "", - "extra": "", - } - if field.endswith("_id"): - if len(tables[table_name]["columns"]) == 1 and field.endswith("_id"): - suspected_primary_key = field[:-3] - if suspected_primary_key in domain: - rel_table[field]["primary_key_table"] = table_name - else: - rel_table[field]["fk_tables"].append(table_name) - else: - rel_table[field]["fk_tables"].append(table_name) - - for key, tables_info in tqdm.tqdm( - rel_table.items(), desc="Creating relationships from names" - ): - if len(tables_info["fk_tables"]) > 0: - fk_tables = list(set(tables_info["fk_tables"])) - if len(tables_info["primary_key_table"]) > 0: - for table in fk_tables: - if table not in relationships: - relationships[table_name] = [] - relationships[table].append( - { - "from": table, - "to": tables_info["primary_key_table"], - "source_column": key, - "target_column": key, - "note": "many-one", - } - ) - else: - for table_1 in fk_tables: - for table_2 in fk_tables: - if table_1 != table_2: - if table_1 not in relationships: - relationships[table_1] = [] - relationships[table_1].append( - { - "from": table_1, - "to": table_2, - "source_column": key, - "target_column": key, - "note": "many-many", - } - ) - - await load_to_graph(graph_id, tables, relationships, db_name=db_name) - return True, "Data dictionary loaded successfully into graph" - - except Exception as e: - return False, f"Error loading CSV: {str(e)}" - # else: - # # For case 2: when no primary key table exists, \ - # # connect all FK tables to each other - # graph.query( - # """ - # CREATE (src: Column {name: $col, cardinality: $cardinality}) - # """, - # { - # 'col': key, - # 'cardinality': 'many-many' - # } - # ) - # for i in range(len(fk_tables)): - # graph.query( - # """ - # MATCH (src:Column {name: $source_col}) - # -[:BELONGS_TO]->(source:Table {name: $source_table}) - # MATCH (tgt:Column {name: $target_col, cardinality: $cardinality}) - # CREATE (src)-[:REFERENCES { - # constraint_name: $fk_name, - # cardinality: $cardinality - # }]->(tgt) - # """, - # { - # 'source_col': key, - # 'target_col': key, - # 'source_table': fk_tables[i], - # 'fk_name': key, - # 'cardinality': 'many-many' - # } - # ) - - -# # Second pass: Create table nodes -# for table_name, table_info in tqdm.tqdm(tables.items(), desc="Creating Table nodes"): -# # Skip if no columns (probably just a reference) -# if not table_info['columns']: -# continue - -# # Generate embedding for table description -# table_desc = table_info['description'] -# embedding_result = client.models.embed_content( -# model="text-embedding-004", -# contents=[table_desc if table_desc else table_name], -# ) - -# # Create table node -# graph.query( -# """ -# CREATE (t:Table { -# name: $table_name, -# description: $description, -# embedding: vecf32($embedding) -# }) -# """, -# { -# 'table_name': table_name, -# 'description': table_desc, -# 'embedding': embedding_result.embeddings[0].values -# } -# ) -# try: -# embed_columns = [] -# batch_size = 50 -# col_descriptions = table_info['col_descriptions'] -# for batch in tqdm.tqdm( -# [col_descriptions[i:i + batch_size] \ -# for i in range(0, len(col_descriptions), batch_size)], -# desc=f"Creating embeddings for {table_name}"): - -# embedding_result = embedding( -# model='bedrock/cohere.embed-english-v3', -# input=batch[:95], -# aws_profile_name=Config.AWS_PROFILE, -# aws_region_name=Config.AWS_REGION) -# embed_columns.extend([emb.values for emb in embedding_result.embeddings]) -# except Exception as e: -# print(f"Error creating embeddings: {str(e)}") - -# # Create column nodes -# for idx, (col_name, col_info) in tqdm.tqdm( -# enumerate(table_info['columns'].items()), -# desc=f"Creating columns for {table_name}", -# total=len(table_info['columns'])): -# # embedding_result = embedding( -# # model=Config.EMBEDDING_MODEL, -# # input=[col_info['description'] if col_info['description'] else col_name] -# # ) - -# ## Temp -# # agent_tax = TaxonomyAgent() -# # tax = agent_tax.get_answer(col_name, col_info) -# # # -# graph.query( -# """ -# MATCH (t:Table {name: $table_name}) -# CREATE (c:Column { -# name: $col_name, -# type: $type, -# nullable: $nullable, -# key_type: $key, -# default_value: $default, -# extra: $extra, -# description: $description, -# embedding: vecf32($embedding) -# })-[:BELONGS_TO]->(t) -# """, -# { -# 'table_name': table_name, -# 'col_name': col_name, -# 'type': col_info['type'], -# 'nullable': col_info['null'], -# 'key': col_info['key'], -# 'default': col_info['default'], -# 'extra': col_info['extra'], -# 'description': col_info['description'], -# 'embedding': embed_columns[idx] -# } -# ) - -# # Third pass: Create relationships -# for table_name, table_info in tqdm.tqdm(tables.items(), \ -# desc="Creating relationships"): -# for rel in table_info['relationships']: -# source_field = rel['source_field'] -# target_table = rel['target_table'] -# cardinality = rel['cardinality'] -# target_field = rel['target_field'] # \ -# # list(tables[tables[table_name]['relationships'][-1] \ -# # ['target_table']]['columns'].keys())[0] -# # Create constraint name -# constraint_name = ( -# f"fk_{table_name.replace('.', '_')}_{source_field}_to_" -# f"{target_table.replace('.', '_')}" -# ) - -# # Create relationship if both tables and columns exist -# try: -# graph.query( -# """ -# MATCH (src:Column {name: $source_col}) -# -[:BELONGS_TO]->(source:Table {name: $source_table}) -# MATCH (tgt:Column {name: $target_col}) -# -[:BELONGS_TO]->(target:Table {name: $target_table}) -# CREATE (src)-[:REFERENCES { -# constraint_name: $fk_name, -# cardinality: $cardinality -# }]->(tgt) -# """, -# { -# 'source_col': source_field, -# 'target_col': target_field, -# 'source_table': table_name, -# 'target_table': target_table, -# 'fk_name': constraint_name, -# 'cardinality': cardinality -# } -# ) -# except Exception as e: -# print(f"Warning: Could not create relationship: {str(e)}") -# continue -# for key, tables_info in tqdm.tqdm(rel_table.items(), \ -# desc="Creating relationships from names"): -# if len(tables_info['fk_tables']) > 0: -# fk_tables = list(set(tables_info['fk_tables'])) -# if len(tables_info['primary_key_table']) > 0: -# for table in fk_tables: -# graph.query( -# """ -# MATCH (src:Column {name: $source_col}) -# -[:BELONGS_TO]->(source:Table {name: $source_table}) -# MATCH (tgt:Column {name: $target_col}) -# -[:BELONGS_TO]->(target:Table {name: $target_table}) -# CREATE (src)-[:REFERENCES { -# constraint_name: $fk_name, -# cardinality: $cardinality -# }]->(tgt) -# """, -# { -# 'source_col': key, -# 'target_col': key, -# 'source_table': table, -# 'target_table': tables_info['primary_key_table'], -# 'fk_name': key, -# 'cardinality': 'many-one' -# } -# ) -# else: -# # For case 2: when no primary key table exists, \ -# # connect all FK tables to each other -# graph.query( -# """ -# CREATE (src: Column {name: $col, cardinality: $cardinality}) -# """, -# { -# 'col': key, -# 'cardinality': 'many-many' -# } -# ) -# for i in range(len(fk_tables)): -# graph.query( -# """ -# MATCH (src:Column {name: $source_col}) -# -[:BELONGS_TO]->(source:Table {name: $source_table}) -# MATCH (tgt:Column {name: $target_col, cardinality: $cardinality}) -# CREATE (src)-[:REFERENCES { -# constraint_name: $fk_name, -# cardinality: $cardinality -# }]->(tgt) -# """, -# { -# 'source_col': key, -# 'target_col': key, -# 'source_table': fk_tables[i], -# 'fk_name': key, -# 'cardinality': 'many-many' -# } -# ) - -# load_to_graph(graph_id, entities, relationships, db_name="ERP system") -# return True, "Data dictionary loaded successfully into graph" - -# except Exception as e: -# return False, f"Error loading CSV: {str(e)}" - - -# if __name__ == "__main__": -# # Example usage -# loader = CSVLoader() -# success, message = loader.load("my_graph", "Data Dictionary.csv") -# print(message) diff --git a/api/loaders/graph_loader.py b/api/loaders/graph_loader.py index c1de42fd..d67b06a4 100644 --- a/api/loaders/graph_loader.py +++ b/api/loaders/graph_loader.py @@ -9,7 +9,7 @@ from api.utils import generate_db_description -async def load_to_graph( +async def load_to_graph( # pylint: disable=too-many-arguments,too-many-positional-arguments,too-many-locals graph_id: str, entities: dict, relationships: dict, @@ -49,7 +49,7 @@ async def load_to_graph( {"size": vec_len}, ) await graph.query("CREATE INDEX FOR (p:Table) ON (p.name)") - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught print(f"Error creating vector indices: {str(e)}") db_des = generate_db_description(db_name=db_name, table_names=list(entities.keys())) @@ -88,7 +88,7 @@ async def load_to_graph( ) # Batch embeddings for table columns - # TODO: Check if the embedding model and description are correct \ + # TODO: Check if the embedding model and description are correct # pylint: disable=fixme # (without 2 sources of truth) batch_flag = True col_descriptions = table_info.get("col_descriptions") @@ -107,7 +107,7 @@ async def load_to_graph( embedding_result = embedding_model.embed(batch) embed_columns.extend(embedding_result) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught print(f"Error creating embeddings: {str(e)}") batch_flag = False @@ -179,6 +179,6 @@ async def load_to_graph( "note": note, }, ) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught print(f"Warning: Could not create relationship: {str(e)}") continue diff --git a/api/loaders/json_loader.py b/api/loaders/json_loader.py deleted file mode 100644 index 3f7dd521..00000000 --- a/api/loaders/json_loader.py +++ /dev/null @@ -1,71 +0,0 @@ -"""JSON loader module for processing JSON schema files.""" - -import json -from typing import Tuple - -import tqdm -from jsonschema import ValidationError - -from api.config import Config -from api.loaders.base_loader import BaseLoader -from api.loaders.graph_loader import load_to_graph -from api.loaders.schema_validator import validate_table_schema - -try: - with open(Config.SCHEMA_PATH, "r", encoding="utf-8") as f: - schema = json.load(f) -except FileNotFoundError as exc: - raise FileNotFoundError(f"Schema file not found: {Config.SCHEMA_PATH}") from exc -except json.JSONDecodeError as exc: - raise ValueError(f"Invalid schema JSON: {str(exc)}") from exc - - -class JSONLoader(BaseLoader): - """JSON schema loader for loading database schemas from JSON files.""" - - @staticmethod - async def load(graph_id: str, data) -> Tuple[bool, str]: - """ - Load the graph data into the database. - It gets the Graph name as an argument and expects - a JSON payload with the following structure: txt2sql/schema_schema.json - """ - - # Validate the JSON with the schema should return a bad request if the payload is not valid - try: - validation_errors = validate_table_schema(data) - if not validation_errors: - print("✅ Schema is valid.") - else: - print("❌ Schema validation failed with the following issues:") - for error in validation_errors: - print(f" - {error}") - raise ValidationError( - "Schema validation failed. Please check the schema and try again." - ) - - except ValidationError as exc: - return False, str(exc) - - relationships = {} - for table_name, table_info in tqdm.tqdm( - data["tables"].items(), "Create Table relationships" - ): - # Create Foreign Key relationships - for fk_name, fk_info in tqdm.tqdm( - table_info["foreign_keys"].items(), "Create Foreign Key relationships" - ): - if table_name not in relationships: - relationships[table_name] = [] - relationships[table_name].append( - { - "from": table_name, - "to": fk_info["referenced_table"], - "source_column": fk_info["column"], - "target_column": fk_info["referenced_column"], - "note": fk_name, - } - ) - await load_to_graph(graph_id, data["tables"], relationships, db_name=data["database"]) - - return True, "Graph loaded successfully" diff --git a/api/loaders/mysql_loader.py b/api/loaders/mysql_loader.py index 5c22c03a..6ec40a06 100644 --- a/api/loaders/mysql_loader.py +++ b/api/loaders/mysql_loader.py @@ -4,7 +4,7 @@ import decimal import logging import re -from typing import Tuple, Dict, Any, List +from typing import AsyncGenerator, Tuple, Dict, Any, List import tqdm import pymysql @@ -14,6 +14,14 @@ from api.loaders.base_loader import BaseLoader from api.loaders.graph_loader import load_to_graph + +class MySQLQueryError(Exception): + """Exception raised for MySQL query execution errors.""" + + +class MySQLConnectionError(Exception): + """Exception raised for MySQL connection errors.""" + logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") @@ -22,13 +30,13 @@ class MySQLLoader(BaseLoader): Loader for MySQL databases that connects and extracts schema information. """ - # DDL operations that modify database schema + # DDL operations that modify database schema # pylint: disable=duplicate-code SCHEMA_MODIFYING_OPERATIONS = { 'CREATE', 'ALTER', 'DROP', 'RENAME', 'TRUNCATE' } # More specific patterns for schema-affecting operations - SCHEMA_PATTERNS = [ + SCHEMA_PATTERNS = [ # pylint: disable=duplicate-code r'^\s*CREATE\s+TABLE', r'^\s*CREATE\s+INDEX', r'^\s*CREATE\s+UNIQUE\s+INDEX', @@ -45,6 +53,35 @@ class MySQLLoader(BaseLoader): r'^\s*DROP\s+SCHEMA', ] + @staticmethod + def _execute_count_query(cursor, table_name: str, col_name: str) -> Tuple[int, int]: + """ + Execute query to get total count and distinct count for a column. + MySQL implementation returning counts from dictionary-style results. + """ + query = f""" + SELECT COUNT(*) AS total_count, + COUNT(DISTINCT `{col_name}`) AS distinct_count + FROM `{table_name}`; + """ + + cursor.execute(query) + output = cursor.fetchall() + first_result = output[0] + return first_result['total_count'], first_result['distinct_count'] + + @staticmethod + def _execute_distinct_query(cursor, table_name: str, col_name: str) -> List[Any]: + """ + Execute query to get distinct values for a column. + MySQL implementation handling dictionary-style results. + """ + query = f"SELECT DISTINCT `{col_name}` FROM `{table_name}`;" + cursor.execute(query) + + distinct_results = cursor.fetchall() + return [row[col_name] for row in distinct_results if row[col_name] is not None] + @staticmethod def _serialize_value(value): """ @@ -62,10 +99,9 @@ def _serialize_value(value): return value.isoformat() if isinstance(value, decimal.Decimal): return float(value) - elif value is None: + if value is None: return None - else: - return value + return value @staticmethod def _parse_mysql_url(connection_url: str) -> Dict[str, str]: @@ -125,7 +161,7 @@ def _parse_mysql_url(connection_url: str) -> Dict[str, str]: } @staticmethod - async def load(prefix: str, connection_url: str) -> Tuple[bool, str]: + async def load(prefix: str, connection_url: str) -> AsyncGenerator[tuple[bool, str], None]: """ Load the graph data from a MySQL database into the graph database. @@ -148,9 +184,11 @@ async def load(prefix: str, connection_url: str) -> Tuple[bool, str]: db_name = conn_params['database'] # Get all table information + yield True, "Extracting table information..." entities = MySQLLoader.extract_tables_info(cursor, db_name) # Get all relationship information + yield True, "Extracting relationship information..." relationships = MySQLLoader.extract_relationships(cursor, db_name) # Close database connection @@ -158,16 +196,19 @@ async def load(prefix: str, connection_url: str) -> Tuple[bool, str]: conn.close() # Load data into graph + yield True, "Loading data into graph..." await load_to_graph(f"{prefix}_{db_name}", entities, relationships, db_name=db_name, db_url=connection_url) - return True, (f"MySQL schema loaded successfully. " + yield True, (f"MySQL schema loaded successfully. " f"Found {len(entities)} tables.") except pymysql.MySQLError as e: - return False, f"MySQL connection error: {str(e)}" - except Exception as e: - return False, f"Error loading MySQL schema: {str(e)}" + logging.error("MySQL connection error: %s", e) + yield False, f"MySQL connection error: {str(e)}" + except Exception as e: # pylint: disable=broad-exception-caught + logging.error("Error loading MySQL schema: %s", e) + yield False, f"Error loading MySQL schema: {str(e)}" @staticmethod def extract_tables_info(cursor, db_name: str) -> Dict[str, Any]: @@ -283,6 +324,12 @@ def extract_columns_info(cursor, db_name: str, table_name: str) -> Dict[str, Any if column_default is not None: description_parts.append(f"(Default: {column_default})") + # Add distinct values if applicable + distinct_values_desc = MySQLLoader.extract_distinct_values_for_column( + cursor, table_name, col_name + ) + description_parts.extend(distinct_values_desc) + columns_info[col_name] = { 'type': data_type, 'null': is_nullable, @@ -416,10 +463,10 @@ async def refresh_graph_schema(graph_id: str, db_url: str) -> Tuple[bool, str]: Tuple of (success, message) """ try: - logging.info("Schema modification detected. Refreshing graph schema for: %s", graph_id) + logging.info("Schema modification detected. Refreshing graph schema.") # Import here to avoid circular imports - from api.extensions import db + from api.extensions import db # pylint: disable=import-error,import-outside-toplevel # Clear existing graph data # Drop current graph before reloading @@ -442,10 +489,10 @@ async def refresh_graph_schema(graph_id: str, db_url: str) -> Tuple[bool, str]: logging.info("Graph schema refreshed successfully.") return True, message - logging.error("Schema refresh failed for graph %s: %s", graph_id, message) + logging.error("Schema refresh failed") return False, "Failed to reload schema" - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught # Log the error and return failure logging.error("Error refreshing graph schema: %s", str(e)) error_msg = "Error refreshing graph schema" @@ -471,7 +518,7 @@ def execute_sql_query(sql_query: str, db_url: str) -> List[Dict[str, Any]]: # Connect to MySQL database conn = pymysql.connect(**conn_params) - cursor = conn.cursor(dictionary=True) + cursor = conn.cursor(DictCursor) # Execute the SQL query cursor.execute(sql_query) @@ -522,17 +569,13 @@ def execute_sql_query(sql_query: str, db_url: str) -> List[Dict[str, Any]]: conn.rollback() cursor.close() conn.close() - except pymysql.MySQLError as e: - # Rollback in case of error - if 'conn' in locals(): - conn.rollback() - cursor.close() - conn.close() - raise Exception(f"MySQL query execution error: {str(e)}") from e + logging.error("MySQL query execution error: %s", e) + raise MySQLQueryError(f"MySQL query execution error: {str(e)}") from e except Exception as e: # Rollback in case of error if 'conn' in locals(): conn.rollback() cursor.close() conn.close() - raise Exception(f"Error executing SQL query: {str(e)}") from e + logging.error("Error executing SQL query: %s", e) + raise MySQLQueryError(f"Error executing SQL query: {str(e)}") from e diff --git a/api/loaders/odata_loader.py b/api/loaders/odata_loader.py deleted file mode 100644 index 5558c878..00000000 --- a/api/loaders/odata_loader.py +++ /dev/null @@ -1,152 +0,0 @@ -import re -import xml.etree.ElementTree as ET -from typing import Tuple - -import tqdm - -from api.loaders.base_loader import BaseLoader -from api.loaders.graph_loader import load_to_graph - - -class ODataLoader(BaseLoader): - """ - This class is responsible for loading OData schemas into a Graph. - """ - - @staticmethod - async def load(graph_id: str, data) -> Tuple[bool, str]: - """Load XML ODATA schema into a Graph.""" - - try: - # Parse the OData schema - entities, relationships = ODataLoader._parse_odata_schema(data) - except ET.ParseError: - return False, "Invalid XML content" - - await load_to_graph(graph_id, entities, relationships, db_name="ERP system") - - return True, "Graph loaded successfully" - - @staticmethod - def _parse_odata_schema(data) -> Tuple[dict, dict]: - """ - This function parses the OData schema and returns entities and relationships. - """ - entities = {} - relationships = {} - - root = ET.fromstring(data) - - # Define namespaces - namespaces = { - "edmx": "http://docs.oasis-open.org/odata/ns/edmx", - "edm": "http://docs.oasis-open.org/odata/ns/edm", - } - - schema_element = root.find(".//edmx:DataServices/edm:Schema", namespaces) - if schema_element is None: - raise ET.ParseError("Schema element not found") - - entity_types = schema_element.findall("edm:EntityType", namespaces) - for entity_type in tqdm.tqdm(entity_types, "Parsing OData schema"): - entity_name = entity_type.get("Name") - entities[entity_name] = {"col_descriptions": []} - entities[entity_name]["columns"] = {} - for prop in entity_type.findall("edm:Property", namespaces): - prop_name = prop.get("Name") - try: - if prop_name is not None: - entities[entity_name]["columns"][prop_name] = {} - entities[entity_name]["columns"][prop_name]["type"] = prop.get( - "Type" - ).split(".")[-1] - col_des = entity_name - if len(prop.findall("edm:Annotation", namespaces)) > 0: - if len(prop.findall("edm:Annotation", namespaces)[0].get("String")) > 0: - col_des = prop.findall("edm:Annotation", namespaces)[0].get( - "String" - ) - entities[entity_name]["col_descriptions"].append(col_des) - entities[entity_name]["columns"][prop_name]["description"] = col_des - except Exception as e: - print(f"Error parsing property {prop_name} for entity {entity_name}") - continue - - # = {prop.get("Name"): prop.get("Type") \ - # for prop in entity_type.findall("edm:Property", namespaces)} - description = entity_type.findall("edm:Annotation", namespaces) - if len(description) > 0: - entities[entity_name]["description"] = ( - description[0].get("String").replace("'", "\\'") - ) - else: - try: - entities[entity_name]["description"] = ( - entity_name - + " with Primery key: " - + entity_type.find("edm:Key/edm:PropertyRef", namespaces).attrib["Name"] - ) - except: - print(f"Error parsing description for entity {entity_name}") - entities[entity_name]["description"] = entity_name - - for entity_type in tqdm.tqdm(entity_types, "Parsing OData schema - relationships"): - - entity_name = entity_type.attrib["Name"] - - for rel in entity_type.findall("edm:NavigationProperty", namespaces): - rel_name = rel.get("Name") - raw_type = rel.get("Type") # e.g., 'Collection(Priority.OData.ABILITYVALUES)' - - # Clean 'Collection(...)' wrapper if exists - if raw_type.startswith("Collection(") and raw_type.endswith(")"): - raw_type = raw_type[len("Collection(") : -1] - - # Extract the target entity name - match = re.search(r"(\w+)$", raw_type) - target_entity = match.group(1) if match else "UNKNOWN" - - source_entity = entity_name - target_entity = target_entity - source_fields = entities.get(entity_name, {})["columns"] - target_fields = entities.get(target_entity, {})["columns"] - - # TODO This usage is for demonstration purposes only, it should be \ - # replaced with a more robust method - source_col, target_col = guess_relationship_columns(source_fields, target_fields) - if source_col and target_col: - # Store the relationship - if rel_name not in relationships: - relationships[rel_name] = [] - # src_col, tgt_col = guess_relationship_columns(source_entity, \ - # target_entity, entities[source_entity], entities[target_entity]) - relationships[rel_name].append( - { - "from": source_entity, - "to": target_entity, - "source_column": source_col, - "target_column": target_col, - "note": ( - "inferred" if source_col and target_col else "implicit/subform" - ), - } - ) - - return entities, relationships - - -# TODO: this funtion is for demonstration purposes only, it should be \ -# replaced with a more robust method -def guess_relationship_columns(source_fields, target_fields): - for src_key, src_meta in source_fields.items(): - if src_key == "description": - continue - for tgt_key, tgt_meta in target_fields.items(): - if tgt_key == "description": - continue - # Heuristic: same type and similar name - if src_meta["type"] == tgt_meta["type"] and ( - src_key.lower() in tgt_key.lower() or tgt_key.lower() in src_key.lower() - ): - return src_key, tgt_key - return None, None diff --git a/api/loaders/postgres_loader.py b/api/loaders/postgres_loader.py index 0ac50e49..354a528f 100644 --- a/api/loaders/postgres_loader.py +++ b/api/loaders/postgres_loader.py @@ -1,32 +1,41 @@ """PostgreSQL loader for loading database schemas into FalkorDB graphs.""" +import re import datetime import decimal import logging -import re -from typing import Tuple, Dict, Any, List +from typing import AsyncGenerator, Tuple, Dict, Any, List import psycopg2 +from psycopg2 import sql import tqdm -from api.loaders.base_loader import BaseLoader -from api.loaders.graph_loader import load_to_graph +from api.loaders.base_loader import BaseLoader # pylint: disable=import-error +from api.loaders.graph_loader import load_to_graph # pylint: disable=import-error logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +class PostgreSQLQueryError(Exception): + """Exception raised when PostgreSQL query execution fails.""" + + +class PostgreSQLConnectionError(Exception): + """Exception raised when PostgreSQL connection fails.""" + + class PostgresLoader(BaseLoader): """ Loader for PostgreSQL databases that connects and extracts schema information. """ - # DDL operations that modify database schema + # DDL operations that modify database schema # pylint: disable=duplicate-code SCHEMA_MODIFYING_OPERATIONS = { 'CREATE', 'ALTER', 'DROP', 'RENAME', 'TRUNCATE' } # More specific patterns for schema-affecting operations - SCHEMA_PATTERNS = [ + SCHEMA_PATTERNS = [ # pylint: disable=duplicate-code r'^\s*CREATE\s+TABLE', r'^\s*CREATE\s+INDEX', r'^\s*CREATE\s+UNIQUE\s+INDEX', @@ -41,6 +50,39 @@ class PostgresLoader(BaseLoader): r'^\s*DROP\s+SCHEMA', ] + @staticmethod + def _execute_count_query(cursor, table_name: str, col_name: str) -> Tuple[int, int]: + """ + Execute query to get total count and distinct count for a column. + PostgreSQL implementation returning counts from tuple-style results. + """ + query = sql.SQL(""" + SELECT COUNT(*) AS total_count, + COUNT(DISTINCT {col}) AS distinct_count + FROM {table}; + """).format( + col=sql.Identifier(col_name), + table=sql.Identifier(table_name) + ) + cursor.execute(query) + output = cursor.fetchall() + first_result = output[0] + return first_result[0], first_result[1] + + @staticmethod + def _execute_distinct_query(cursor, table_name: str, col_name: str) -> List[Any]: + """ + Execute query to get distinct values for a column. + PostgreSQL implementation handling tuple-style results. + """ + query = sql.SQL("SELECT DISTINCT {col} FROM {table};").format( + col=sql.Identifier(col_name), + table=sql.Identifier(table_name) + ) + cursor.execute(query) + distinct_results = cursor.fetchall() + return [row[0] for row in distinct_results if row[0] is not None] + @staticmethod def _serialize_value(value): """ @@ -58,13 +100,12 @@ def _serialize_value(value): return value.isoformat() if isinstance(value, decimal.Decimal): return float(value) - elif value is None: + if value is None: return None - else: - return value + return value @staticmethod - async def load(prefix: str, connection_url: str) -> Tuple[bool, str]: + async def load(prefix: str, connection_url: str) -> AsyncGenerator[tuple[bool, str], None]: """ Load the graph data from a PostgreSQL database into the graph database. @@ -86,8 +127,10 @@ async def load(prefix: str, connection_url: str) -> Tuple[bool, str]: db_name = db_name.split('?')[0] # Get all table information + yield True, "Extracting table information..." entities = PostgresLoader.extract_tables_info(cursor) + yield True, "Extracting relationship information..." # Get all relationship information relationships = PostgresLoader.extract_relationships(cursor) @@ -95,17 +138,20 @@ async def load(prefix: str, connection_url: str) -> Tuple[bool, str]: cursor.close() conn.close() + yield True, "Loading data into graph..." # Load data into graph await load_to_graph(f"{prefix}_{db_name}", entities, relationships, db_name=db_name, db_url=connection_url) - return True, (f"PostgreSQL schema loaded successfully. " + yield True, (f"PostgreSQL schema loaded successfully. " f"Found {len(entities)} tables.") except psycopg2.Error as e: - return False, f"PostgreSQL connection error: {str(e)}" - except Exception as e: - return False, f"Error loading PostgreSQL schema: {str(e)}" + logging.error("PostgreSQL connection error: %s", e) + yield False, f"PostgreSQL connection error: {str(e)}" + except Exception as e: # pylint: disable=broad-exception-caught + logging.error("Error loading PostgreSQL schema: %s", e) + yield False, f"Error loading PostgreSQL schema: {str(e)}" @staticmethod def extract_tables_info(cursor) -> Dict[str, Any]: @@ -233,6 +279,12 @@ def extract_columns_info(cursor, table_name: str) -> Dict[str, Any]: if column_default: description_parts.append(f"(Default: {column_default})") + # Add distinct values if applicable + distinct_values_desc = PostgresLoader.extract_distinct_values_for_column( + cursor, table_name, col_name + ) + description_parts.extend(distinct_values_desc) + columns_info[col_name] = { 'type': data_type, 'null': is_nullable, @@ -241,6 +293,7 @@ def extract_columns_info(cursor, table_name: str) -> Dict[str, Any]: 'default': column_default } + return columns_info @staticmethod @@ -377,10 +430,10 @@ async def refresh_graph_schema(graph_id: str, db_url: str) -> Tuple[bool, str]: Tuple of (success, message) """ try: - logging.info("Schema modification detected. Refreshing graph schema for: %s", graph_id) + logging.info("Schema modification detected. Refreshing graph schema.") # Import here to avoid circular imports - from api.extensions import db + from api.extensions import db # pylint: disable=import-error,import-outside-toplevel # Clear existing graph data # Drop current graph before reloading @@ -403,10 +456,10 @@ async def refresh_graph_schema(graph_id: str, db_url: str) -> Tuple[bool, str]: logging.info("Graph schema refreshed successfully.") return True, message - logging.error("Schema refresh failed for graph %s: %s", graph_id, message) + logging.error("Schema refresh failed") return False, "Failed to reload schema" - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught # Log the error and return failure logging.error("Error refreshing graph schema: %s", str(e)) error_msg = "Error refreshing graph schema" @@ -481,11 +534,11 @@ def execute_sql_query(sql_query: str, db_url: str) -> List[Dict[str, Any]]: conn.rollback() cursor.close() conn.close() - raise Exception(f"PostgreSQL query execution error: {str(e)}") + raise PostgreSQLConnectionError(f"PostgreSQL query execution error: {str(e)}") from e except Exception as e: # Rollback in case of error if 'conn' in locals(): conn.rollback() cursor.close() conn.close() - raise Exception(f"Error executing SQL query: {str(e)}") + raise PostgreSQLQueryError(f"Error executing SQL query: {str(e)}") from e diff --git a/api/loaders/schema_validator.py b/api/loaders/schema_validator.py deleted file mode 100644 index 5ecc2d78..00000000 --- a/api/loaders/schema_validator.py +++ /dev/null @@ -1,101 +0,0 @@ -"""Schema validation module for table schemas.""" - -REQUIRED_COLUMN_KEYS = {"description", "type", "null", "key", "default"} -VALID_NULL_VALUES = {"YES", "NO"} - - -def validate_table_schema(schema): - """ - Validate a table schema structure. - - Args: - schema (dict): The schema dictionary to validate - - Returns: - list: List of validation errors found - """ - errors = [] - - # Validate top-level database key - if "database" not in schema or not isinstance(schema["database"], str): - errors.append("Missing or invalid 'database' key") - - # Validate tables key - if "tables" not in schema or not isinstance(schema["tables"], dict): - errors.append("Missing or invalid 'tables' key") - return errors - - for table_name, table_data in schema["tables"].items(): - errors.extend(_validate_table(table_name, table_data)) - - return errors - - -def _validate_table(table_name, table_data): - """Validate a single table's structure.""" - errors = [] - - if not table_data.get("description"): - errors.append(f"Table '{table_name}' is missing a description") - - if "columns" not in table_data or not isinstance(table_data["columns"], dict): - errors.append(f"Table '{table_name}' has no valid 'columns' definition") - return errors - - for column_name, column_data in table_data["columns"].items(): - errors.extend(_validate_column(table_name, column_name, column_data)) - - # Optional: validate foreign keys - if "foreign_keys" in table_data: - errors.extend(_validate_foreign_keys(table_name, table_data["foreign_keys"])) - - return errors - - -def _validate_column(table_name, column_name, column_data): - """Validate a single column's structure.""" - errors = [] - - # Check for missing required keys - missing_keys = REQUIRED_COLUMN_KEYS - column_data.keys() - if missing_keys: - errors.append( - f"Column '{column_name}' in table '{table_name}' " - f"is missing keys: {missing_keys}" - ) - return errors - - # Validate non-empty description - if not column_data.get("description"): - errors.append( - f"Column '{column_name}' in table '{table_name}' has an empty description" - ) - - # Validate 'null' field - if column_data["null"] not in VALID_NULL_VALUES: - errors.append( - f"Column '{column_name}' in table '{table_name}' " - f"has invalid 'null' value: {column_data['null']}" - ) - - return errors - - -def _validate_foreign_keys(table_name, foreign_keys): - """Validate foreign keys structure.""" - errors = [] - - if not isinstance(foreign_keys, dict): - errors.append( - f"Foreign keys for table '{table_name}' must be a dictionary" - ) - return errors - - for fk_name, fk_data in foreign_keys.items(): - for key in ("column", "referenced_table", "referenced_column"): - if key not in fk_data or not fk_data[key]: - errors.append( - f"Foreign key '{fk_name}' in table '{table_name}' is missing '{key}'" - ) - - return errors diff --git a/api/memory/__init__.py b/api/memory/__init__.py index 264d0421..6c8e9275 100644 --- a/api/memory/__init__.py +++ b/api/memory/__init__.py @@ -4,4 +4,4 @@ from .graphiti_tool import MemoryTool -__all__ = ["MemoryTool"] \ No newline at end of file +__all__ = ["MemoryTool"] diff --git a/api/memory/graphiti_tool.py b/api/memory/graphiti_tool.py index f5c1bf53..2b23d29e 100644 --- a/api/memory/graphiti_tool.py +++ b/api/memory/graphiti_tool.py @@ -2,7 +2,7 @@ Graphiti integration for QueryWeaver memory component. Saves summarized conversations with user and database nodes. """ - +# pylint: disable=all import asyncio import os from typing import List, Dict, Any, Optional @@ -40,7 +40,6 @@ def __init__(self, user_id: str, graph_id: str): self.user_id = user_id self.graph_id = graph_id - self.config = Config() @classmethod @@ -463,17 +462,17 @@ async def search_memories(self, query: str, user_limit: int = 5, database_limit: # Add similar queries context if similar_queries: memory_context += "SIMILAR QUERIES HISTORY:\n" - + # Separate successful and failed queries successful_queries = [q for q in similar_queries if q.get('success', False)] failed_queries = [q for q in similar_queries if not q.get('success', False)] - + if successful_queries: memory_context += "\nSUCCESSFUL QUERIES (Learn from these patterns):\n" for i, query_data in enumerate(successful_queries, 1): memory_context += f"{i}. Query: \"{query_data.get('user_query', '')}\"\n" memory_context += f" Successful SQL: {query_data.get('sql_query', '')}\n\n" - + if failed_queries: memory_context += "FAILED QUERIES (Avoid these patterns):\n" for i, query_data in enumerate(failed_queries, 1): @@ -484,9 +483,9 @@ async def search_memories(self, query: str, user_limit: int = 5, database_limit: memory_context += f" AVOID this approach.\n\n" memory_context += "\n" - + return memory_context - + except Exception as e: print(f"Error in concurrent memory search: {e}") return "" @@ -534,12 +533,12 @@ async def summarize_conversation(self, conversation: Dict[str, Any]) -> Dict[str conv_text += f"Error: {conversation['error']}\n" if conversation.get('answer'): conv_text += f"Assistant: {conversation['answer']}\n" - + # Add success/failure status success_status = conversation.get('success', True) conv_text += f"Execution Status: {'Success' if success_status else 'Failed'}\n" conv_text += "\n" - + prompt = f""" Analyze this QueryWeaver question-answer interaction with database "{self.graph_id}". Focus exclusively on extracting graph-oriented facts about the database and its entities, relationships, and structure. @@ -570,7 +569,7 @@ async def summarize_conversation(self, conversation: Dict[str, Any]) -> Dict[str try: response = completion( - model=self.config.COMPLETION_MODEL, + model=Config.COMPLETION_MODEL, messages=[{"role": "user", "content": prompt}], temperature=0.1 ) @@ -638,28 +637,33 @@ def get_azure_openai_clients(): def create_graphiti_client(falkor_driver: FalkorDriver) -> Graphiti: """Create a Graphiti client configured with Azure OpenAI.""" - # Get Azure OpenAI clients and config - llm_client_azure, embedding_client_azure, config = get_azure_openai_clients() - - # Create LLM Config with Azure deployment names - azure_llm_config = LLMConfig( - small_model=config.small_model_deployment, - model=config.llm_deployment, - ) - # Initialize Graphiti with Azure OpenAI clients - return Graphiti( - graph_driver=falkor_driver, - llm_client=OpenAIClient(config=azure_llm_config, client=llm_client_azure), - embedder=OpenAIEmbedder( - config=OpenAIEmbedderConfig(embedding_model=config.embedding_deployment), - client=embedding_client_azure, - ), - cross_encoder=OpenAIRerankerClient( - config=LLMConfig( - model=azure_llm_config.small_model # Use small model for reranking + if Config.AZURE_FLAG: + # Get Azure OpenAI clients and config + llm_client_azure, embedding_client_azure, config = get_azure_openai_clients() + + # Create LLM Config with Azure deployment names + azure_llm_config = LLMConfig( + small_model=config.small_model_deployment, + model=config.llm_deployment, + ) + + graphiti_client = Graphiti( + graph_driver=falkor_driver, + llm_client=OpenAIClient(config=azure_llm_config, client=llm_client_azure), + embedder=OpenAIEmbedder( + config=OpenAIEmbedderConfig(embedding_model=config.embedding_deployment), + client=embedding_client_azure, ), - client=llm_client_azure, - ), - ) + cross_encoder=OpenAIRerankerClient( + config=LLMConfig( + model=azure_llm_config.small_model # Use small model for reranking + ), + client=llm_client_azure, + ), + ) + else: # Fallback to default OpenAI config + graphiti_client = Graphiti(graph_driver=falkor_driver) + + return graphiti_client diff --git a/api/routes/__init__.py b/api/routes/__init__.py index c8d3ba5c..05d84c9c 100644 --- a/api/routes/__init__.py +++ b/api/routes/__init__.py @@ -1,3 +1,5 @@ +"""Routes module for text2sql API.""" + # Routes module for text2sql API from .auth import auth_router diff --git a/api/routes/auth.py b/api/routes/auth.py index f48a87b7..399395ae 100644 --- a/api/routes/auth.py +++ b/api/routes/auth.py @@ -1,19 +1,29 @@ """Authentication routes for the text2sql API.""" +# pylint: disable=all +import hashlib +import hmac import logging import os +import re +import time import secrets + from pathlib import Path from urllib.parse import urljoin +from authlib.integrations.starlette_client import OAuth + from fastapi import APIRouter, Request, HTTPException, status -from fastapi.responses import RedirectResponse, HTMLResponse +from fastapi.responses import RedirectResponse, HTMLResponse, JSONResponse from fastapi.templating import Jinja2Templates -from authlib.integrations.starlette_client import OAuth from jinja2 import Environment, FileSystemLoader, FileSystemBytecodeCache, select_autoescape from starlette.config import Config +from pydantic import BaseModel + +from api.auth.user_management import delete_user_token, ensure_user_in_organizations, validate_user +from api.extensions import db -from api.auth.user_management import delete_user_token, validate_user # Router auth_router = APIRouter() @@ -36,6 +46,314 @@ templates.env.globals["google_tag_manager_id"] = os.getenv("GOOGLE_TAG_MANAGER_ID") +GOOGLE_AUTH = bool(os.getenv("GOOGLE_CLIENT_ID") and os.getenv("GOOGLE_CLIENT_SECRET")) +GITHUB_AUTH = bool(os.getenv("GITHUB_CLIENT_ID") and os.getenv("GITHUB_CLIENT_SECRET")) +EMAIL_AUTH = bool(os.getenv("EMAIL_AUTH_ENABLED", "").lower() in ["true", "1", "yes", "on"]) + +# ---- Authentication Configuration Helpers ---- +def _is_email_auth_enabled() -> bool: + """Check if email authentication is enabled via environment variable.""" + return EMAIL_AUTH or not (GOOGLE_AUTH or GITHUB_AUTH) + +def _is_google_auth_enabled() -> bool: + """Check if Google OAuth is enabled via environment variables.""" + return GOOGLE_AUTH + +def _is_github_auth_enabled() -> bool: + """Check if GitHub OAuth is enabled via environment variables.""" + return GITHUB_AUTH + +def _get_auth_config() -> dict: + """Get authentication configuration for templates.""" + return { + "email_auth_enabled": _is_email_auth_enabled(), + "google_auth_enabled": _is_google_auth_enabled(), + "github_auth_enabled": _is_github_auth_enabled(), + } + +# Data models for email authentication +class EmailLoginRequest(BaseModel): + """_summary_ + + Args: + BaseModel (_type_): _description_ + """ + email: str + password: str + +class EmailSignupRequest(BaseModel): + """_summary_ + + Args: + BaseModel (_type_): _description_ + """ + firstName: str + lastName: str + email: str + password: str + +# ---- Password utilities ---- +def _hash_password(password: str) -> str: + """Hash a password using PBKDF2 with a random salt.""" + salt = os.urandom(32) + password_hash = hashlib.pbkdf2_hmac('sha256', password.encode('utf-8'), salt, 100000) + return (salt + password_hash).hex() + +def _verify_password(password: str, stored_password_hex: str) -> bool: + """Verify a password against its hash using constant-time comparison.""" + try: + stored_password = bytes.fromhex(stored_password_hex) + salt = stored_password[:32] + stored_hash = stored_password[32:] + + password_hash = hashlib.pbkdf2_hmac('sha256', password.encode('utf-8'), salt, 100000) + + return hmac.compare_digest(password_hash, stored_hash) + except (ValueError, TypeError): + return False + +def _sanitize_for_log(value: str) -> str: + """Sanitize user input for logging by removing newlines and carriage returns.""" + if not isinstance(value, str): + return str(value) + return value.replace('\r\n', '').replace('\n', '').replace('\r', '') + +def _validate_email(email: str) -> bool: + """Basic email validation.""" + pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' + return re.match(pattern, email) is not None + +async def _set_mail_hash(email: str, password_hash: str) -> bool: + """Set email hash for the user in the database.""" + try: + organizations_graph = db.select_graph("Organizations") + + # Sanitize inputs for logging + safe_email = _sanitize_for_log(email) + + # Create new email identity and user + create_query = """ + MERGE (i:Identity { + provider_user_id: $email, + email: $email + }) + SET i.password_hash = $password_hash + RETURN i + """ + + result = await organizations_graph.query(create_query, { + "email": email, + "password_hash": password_hash, + }) + + if result.result_set: + return True + else: + logging.error("Failed to set email hash for user: %s", safe_email) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal server error" + ) + + except Exception as e: + logging.error("Error setting email hash for user %s: %s", safe_email, e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal server error" + ) + +async def _authenticate_email_user(email: str, password: str): + """Authenticate an email user.""" + try: + organizations_graph = db.select_graph("Organizations") + + # Find user by email + query = """ + MATCH (i:Identity {provider: 'email', email: $email})-[:AUTHENTICATES]->(u:User) + RETURN i, u + """ + + result = await organizations_graph.query(query, {"email": email}) + + if not result.result_set: + return False, "Invalid email or password" + + identity = result.result_set[0][0] + user = result.result_set[0][1] + + # Verify password - access Node properties correctly + stored_password_hash = identity.properties.get('password_hash') + if not stored_password_hash or not _verify_password(password, stored_password_hash): + return False, "Invalid email or password" + + # Update last login + update_query = """ + MATCH (i:Identity {provider: 'email', email: $email}) + SET i.last_login = timestamp() + """ + await organizations_graph.query(update_query, {"email": email}) + + logging.info("EMAIL USER AUTHENTICATED: email=%r", _sanitize_for_log(email)) + return True, {"identity": identity, "user": user} + + except Exception as e: + logging.error("Error authenticating email user: %s", e) + return False, "Internal error" + +# ---- Email Authentication Routes ---- +@auth_router.post("/signup/email") +async def email_signup(request: Request, signup_data: EmailSignupRequest) -> JSONResponse: + """Handle email/password user registration.""" + try: + # Check if email authentication is enabled + if os.getenv("EMAIL_AUTH_ENABLED", "").lower() not in ["true", "1", "yes", "on"]: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="GEmail authentication is not enabled" + ) + + # Validate required fields + if not all([signup_data.firstName, signup_data.lastName, + signup_data.email, signup_data.password]): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="All fields are required" + ) + + first_name = signup_data.firstName.strip() + last_name = signup_data.lastName.strip() + email = signup_data.email.strip().lower() + password = signup_data.password + + # Validate email format + if not _validate_email(email): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Invalid email format" + ) + + # Validate password strength + if len(password) < 8: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Password must be at least 8 characters long" + ) + + api_token = secrets.token_urlsafe(32) + # Create organization association + success, user_info = await ensure_user_in_organizations(email, email, + f"{first_name} {last_name}", "email", api_token) + + if success and user_info and user_info["new_identity"]: + logging.info("New user created: %s", _sanitize_for_log(email)) + + # Hash password + password_hash = _hash_password(password) + + # Set email hash + await _set_mail_hash(email, password_hash) + + else: + logging.info("User already exists: %s", _sanitize_for_log(email)) + + logging.info("User registration successful: %s", _sanitize_for_log(email)) + + response = JSONResponse({ + "success": True, + }, status_code=201) + response.set_cookie( + key="api_token", + value=api_token, + httponly=True, + secure=True + ) + return response + + except Exception as e: + logging.error("Signup error: %s", e) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Registration failed" + ) + +@auth_router.post("/login/email") +async def email_login(request: Request, login_data: EmailLoginRequest) -> JSONResponse: + """Handle email/password user login.""" + try: + # Check if email authentication is enabled + if os.getenv("EMAIL_AUTH_ENABLED", "").lower() not in ["true", "1", "yes", "on"]: + return JSONResponse( + {"success": False, "error": "Email authentication is not enabled"}, + status_code=status.HTTP_403_FORBIDDEN + ) + + # Validate required fields + if not login_data.email or not login_data.password: + return JSONResponse( + {"success": False, "error": "Email and password are required"}, + status_code=status.HTTP_400_BAD_REQUEST + ) + + email = login_data.email.strip().lower() + password = login_data.password + + # Validate email format + if not _validate_email(email): + return JSONResponse( + {"success": False, "error": "Invalid email format"}, + status_code=status.HTTP_400_BAD_REQUEST + ) + + # Authenticate user + success, result = await _authenticate_email_user(email, password) + + if not success: + return JSONResponse( + {"success": False, "error": result}, + status_code=status.HTTP_401_UNAUTHORIZED + ) + + # Set session data - result is a dict when success is True + if isinstance(result, dict): + user_node = result.get("user") + identity_node = result.get("identity") + + # Access node properties correctly + user_props = ( + user_node.properties + if user_node and hasattr(user_node, "properties") + else {} + ) + identity_props = ( + identity_node.properties + if identity_node and hasattr(identity_node, "properties") + else {} + ) + + request.session["user_info"] = { + "id": identity_props.get("provider_user_id", email), + "name": user_props.get("name", ""), + "email": user_props.get("email", email), + "picture": user_props.get("picture", ""), + "provider": "email", + } + request.session["email_authenticated"] = True + request.session["token_validated_at"] = time.time() + + return JSONResponse({"success": True, "message": "Login successful"}) + else: + return JSONResponse( + {"success": False, "error": "Authentication failed"}, + status_code=status.HTTP_401_UNAUTHORIZED + ) + + except Exception as e: + logging.error("Login error: %s", e) + return JSONResponse( + {"success": False, "error": "Login failed"}, + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR + ) + # ---- Helpers ---- def _get_provider_client(request: Request, provider: str): """Get an OAuth provider client from app.state.oauth""" @@ -48,24 +366,6 @@ def _get_provider_client(request: Request, provider: str): raise HTTPException(status_code=500, detail=f"OAuth provider {provider} not configured") return client -@auth_router.get("/chat", name="auth.chat", response_class=HTMLResponse) -async def chat(request: Request) -> HTMLResponse: - """Explicit chat route (renders main chat UI).""" - user_info, is_authenticated = await validate_user(request) - - if not is_authenticated or not user_info: - is_authenticated = False - user_info = None - - return templates.TemplateResponse( - "chat.j2", - { - "request": request, - "is_authenticated": is_authenticated, - "user_info": user_info, - }, - ) - def _build_callback_url(request: Request, path: str) -> str: """Build absolute callback URL, honoring OAUTH_BASE_URL if provided.""" base_override = os.getenv("OAUTH_BASE_URL") @@ -77,37 +377,23 @@ def _build_callback_url(request: Request, path: str) -> str: # ---- Routes ---- @auth_router.get("/", response_class=HTMLResponse) async def home(request: Request) -> HTMLResponse: - """Handle the home page, rendering the landing page for unauthenticated users and the chat page for authenticated users.""" + """ + Handle the home page, rendering the landing page for unauthenticated users + and the chat page for authenticated users. + """ user_info, is_authenticated_flag = await validate_user(request) - - if is_authenticated_flag or user_info: - return templates.TemplateResponse( - "chat.j2", - { - "request": request, - "is_authenticated": True, - "user_info": user_info - } - ) + auth_config = _get_auth_config() return templates.TemplateResponse( - "landing.j2", + "chat.j2", { - "request": request, - "is_authenticated": False, - "user_info": None + "request": request, + "is_authenticated": is_authenticated_flag, + "user_info": user_info, + **auth_config, } ) - - - - -@auth_router.get("/login", response_class=RedirectResponse) -async def login_page(_: Request) -> RedirectResponse: - return RedirectResponse(url="/login/google", status_code=status.HTTP_302_FOUND) - - @auth_router.get("/login/google", name="google.login", response_class=RedirectResponse) async def login_google(request: Request) -> RedirectResponse: """Initiate Google OAuth login flow. @@ -119,6 +405,13 @@ async def login_google(request: Request) -> RedirectResponse: RedirectResponse: The redirect response to the Google OAuth endpoint. """ + # Check if Google auth is enabled + if not _is_google_auth_enabled(): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Google authentication is not configured" + ) + google = _get_provider_client(request, "google") redirect_uri = _build_callback_url(request, "login/google/authorized") @@ -135,7 +428,8 @@ async def login_google(request: Request) -> RedirectResponse: @auth_router.get("/login/google/authorized", response_class=RedirectResponse) async def google_authorized(request: Request) -> RedirectResponse: - """Handle Google OAuth callback and user authorization. + """ + Handle Google OAuth callback and user authorization. Args: request (Request): The incoming request. @@ -143,11 +437,22 @@ async def google_authorized(request: Request) -> RedirectResponse: Returns: RedirectResponse: The redirect response after handling the callback. """ + # Check if Google auth is enabled + if not _is_google_auth_enabled(): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Google authentication is not configured" + ) try: google = _get_provider_client(request, "google") token = await google.authorize_access_token(request) - user_info = token.get("userinfo") + resp = await google.get("userinfo", token=token) + if resp.status_code != 200: + logging.warning("Failed to retrieve user info from Google") + raise HTTPException(status_code=400, detail="Failed to get user info from Google") + + user_info = resp.json() if user_info: user_data = { @@ -165,7 +470,7 @@ async def google_authorized(request: Request) -> RedirectResponse: # Call the registered handler (await if async) await handler('google', user_data, api_token) - redirect = RedirectResponse(url="/chat", status_code=302) + redirect = RedirectResponse(url="/", status_code=302) redirect.set_cookie( key="api_token", value=api_token, @@ -184,12 +489,13 @@ async def google_authorized(request: Request) -> RedirectResponse: raise HTTPException(status_code=400, detail="Failed to get user info from Google") except Exception as e: - logging.error(f"Google OAuth authentication failed: {str(e)}") - raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}") + logging.error("Google OAuth authentication failed: %s", str(e)) # nosemgrep + raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}") from e @auth_router.get("/login/google/callback", response_class=RedirectResponse) async def google_callback_compat(request: Request) -> RedirectResponse: + """Handle Google OAuth callback redirect for compatibility.""" qs = f"?{request.url.query}" if request.url.query else "" redirect = f"/login/google/authorized{qs}" return RedirectResponse(url=redirect, status_code=status.HTTP_307_TEMPORARY_REDIRECT) @@ -197,6 +503,14 @@ async def google_callback_compat(request: Request) -> RedirectResponse: @auth_router.get("/login/github", name="github.login", response_class=RedirectResponse) async def login_github(request: Request) -> RedirectResponse: + """Handle GitHub OAuth login redirect.""" + # Check if GitHub auth is enabled + if not _is_github_auth_enabled(): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="GitHub authentication is not configured" + ) + github = _get_provider_client(request, "github") redirect_uri = _build_callback_url(request, "login/github/authorized") @@ -213,6 +527,13 @@ async def login_github(request: Request) -> RedirectResponse: @auth_router.get("/login/github/authorized", response_class=RedirectResponse) async def github_authorized(request: Request) -> RedirectResponse: + """Handle GitHub OAuth authorization callback.""" + # Check if GitHub auth is enabled + if not _is_github_auth_enabled(): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="GitHub authentication is not configured" + ) try: github = _get_provider_client(request, "github") token = await github.authorize_access_token(request) @@ -220,7 +541,7 @@ async def github_authorized(request: Request) -> RedirectResponse: # Fetch GitHub user info resp = await github.get("user", token=token) if resp.status_code != 200: - logging.error("Failed to fetch GitHub user info: %s", resp.text) + logging.error("Failed to fetch GitHub user info: %s", resp.text) # nosemgrep return RedirectResponse(url="/", status_code=status.HTTP_302_FOUND) user_info = resp.json() @@ -253,7 +574,7 @@ async def github_authorized(request: Request) -> RedirectResponse: # Call the registered handler (await if async) await handler('github', user_data, api_token) - redirect = RedirectResponse(url="/chat", status_code=302) + redirect = RedirectResponse(url="/", status_code=302) redirect.set_cookie( key="api_token", value=api_token, @@ -272,12 +593,13 @@ async def github_authorized(request: Request) -> RedirectResponse: raise HTTPException(status_code=400, detail="Failed to get user info from Github") except Exception as e: - logging.error(f"GitHub OAuth authentication failed: {str(e)}") - raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}") + logging.error("GitHub OAuth authentication failed: %s", str(e)) # nosemgrep + raise HTTPException(status_code=400, detail=f"Authentication failed: {str(e)}") from e @auth_router.get("/login/github/callback", response_class=RedirectResponse) async def github_callback_compat(request: Request) -> RedirectResponse: + """Handle GitHub OAuth callback redirect for compatibility.""" qs = f"?{request.url.query}" if request.url.query else "" redirect = f"/login/github/authorized{qs}" return RedirectResponse(url=redirect, status_code=status.HTTP_307_TEMPORARY_REDIRECT) @@ -302,32 +624,33 @@ def init_auth(app): config = Config(environ=os.environ) oauth = OAuth(config) - google_client_id = os.getenv("GOOGLE_CLIENT_ID") - google_client_secret = os.getenv("GOOGLE_CLIENT_SECRET") - if not google_client_id or not google_client_secret: - logging.warning("Google OAuth env vars not set; login will fail until configured.") - - oauth.register( - name="google", - client_id=google_client_id, - client_secret=google_client_secret, - server_metadata_url="https://accounts.google.com/.well-known/openid-configuration", - client_kwargs={"scope": "openid email profile"}, - ) - - github_client_id = os.getenv("GITHUB_CLIENT_ID") - github_client_secret = os.getenv("GITHUB_CLIENT_SECRET") - if not github_client_id or not github_client_secret: - logging.warning("GitHub OAuth env vars not set; login will fail until configured.") - - oauth.register( - name="github", - client_id=github_client_id, - client_secret=github_client_secret, - access_token_url="https://github.com/login/oauth/access_token", - authorize_url="https://github.com/login/oauth/authorize", - api_base_url="https://api.github.com/", - client_kwargs={"scope": "user:email"}, - ) + # Only register Google OAuth if credentials are available + if _is_google_auth_enabled(): + oauth.register( + name="google", + client_id=os.getenv("GOOGLE_CLIENT_ID"), + client_secret=os.getenv("GOOGLE_CLIENT_SECRET"), + server_metadata_url="https://accounts.google.com/.well-known/openid-configuration", + api_base_url="https://openidconnect.googleapis.com/v1/", + client_kwargs={"scope": "openid email profile"}, + ) + logging.info("Google OAuth initialized successfully") + else: + logging.info("Google OAuth not configured - skipping registration") + + # Only register GitHub OAuth if credentials are available + if _is_github_auth_enabled(): + oauth.register( + name="github", + client_id=os.getenv("GITHUB_CLIENT_ID"), + client_secret=os.getenv("GITHUB_CLIENT_SECRET"), + access_token_url="https://github.com/login/oauth/access_token", + authorize_url="https://github.com/login/oauth/authorize", + api_base_url="https://api.github.com/", + client_kwargs={"scope": "user:email"}, + ) + logging.info("GitHub OAuth initialized successfully") + else: + logging.info("GitHub OAuth not configured - skipping registration") app.state.oauth = oauth diff --git a/api/routes/database.py b/api/routes/database.py index 9e8f61e2..182b8d8e 100644 --- a/api/routes/database.py +++ b/api/routes/database.py @@ -1,8 +1,11 @@ """Database connection routes for the text2sql API.""" + import logging +import json +import time from fastapi import APIRouter, Request, HTTPException -from fastapi.responses import JSONResponse +from fastapi.responses import StreamingResponse from pydantic import BaseModel from api.auth.user_management import token_required @@ -11,6 +14,8 @@ database_router = APIRouter() +# Use the same delimiter as in the JavaScript frontend for streaming chunks +MESSAGE_DELIMITER = "|||FALKORDB_MESSAGE_BOUNDARY|||" class DatabaseConnectionRequest(BaseModel): """Database connection request model. @@ -18,16 +23,16 @@ class DatabaseConnectionRequest(BaseModel): Args: BaseModel (_type_): _description_ """ - url: str + url: str -@database_router.post("/database") +@database_router.post("/database", operation_id="connect_database") @token_required async def connect_database(request: Request, db_request: DatabaseConnectionRequest): """ Accepts a JSON payload with a database URL and attempts to connect. Supports both PostgreSQL and MySQL databases. - Returns success or error message. + Streams progress steps as a sequence of JSON messages separated by MESSAGE_DELIMITER. """ url = db_request.url if not url: @@ -37,52 +42,94 @@ async def connect_database(request: Request, db_request: DatabaseConnectionReque if not isinstance(url, str) or len(url.strip()) == 0: raise HTTPException(status_code=400, detail="Invalid URL format") - try: - success = False - result = "" - - # Check for PostgreSQL URL - if url.startswith("postgres://") or url.startswith("postgresql://"): + async def generate(): + overall_start = time.perf_counter() + steps_counter = 0 + try: + # Step 1: Start + steps_counter += 1 + yield json.dumps( + { + "type": "reasoning_step", + "message": f"Step {steps_counter}: Starting database connection", + } + ) + MESSAGE_DELIMITER + + # Step 2: Determine type + db_type = None + if url.startswith("postgres://") or url.startswith("postgresql://"): + db_type = "postgresql" + loader = PostgresLoader + elif url.startswith("mysql://"): + db_type = "mysql" + loader = MySQLLoader + else: + yield json.dumps( + {"type": "error", "message": "Invalid database URL format"} + ) + MESSAGE_DELIMITER + return + + steps_counter += 1 + yield json.dumps( + { + "type": "reasoning_step", + "message": f"Step {steps_counter}: Detected database type: {db_type}. " + "Attempting to load schema...", + } + ) + MESSAGE_DELIMITER + + # Step 3: Attempt to load schema using the loader + success, result = [False, ""] try: - # Attempt to connect/load using the PostgreSQL loader - success, result = await PostgresLoader.load(request.state.user_id, url) - except (ValueError, ConnectionError) as e: - logging.error("PostgreSQL connection error: %s", str(e)) - raise HTTPException( - status_code=500, - detail="Failed to connect to PostgreSQL database", + load_start = time.perf_counter() + async for progress in loader.load(request.state.user_id, url): + success, result = progress + if success: + steps_counter += 1 + yield json.dumps( + { + "type": "reasoning_step", + "message": f"Step {steps_counter}: {result}", + } + ) + MESSAGE_DELIMITER + else: + break + + load_elapsed = time.perf_counter() - load_start + logging.info( + "Database load attempt finished in %.2f seconds", load_elapsed ) - # Check for MySQL URL - elif url.startswith("mysql://"): - try: - # Attempt to connect/load using the MySQL loader - success, result = await MySQLLoader.load(request.state.user_id, url) - except (ValueError, ConnectionError) as e: - logging.error("MySQL connection error: %s", str(e)) - raise HTTPException( - status_code=500, detail="Failed to connect to MySQL database" - ) - - else: - raise HTTPException( - status_code=400, - detail=( - "Invalid database URL. Supported formats: postgresql:// " - "or mysql://" - ), + if success: + yield json.dumps( + { + "type": "final_result", + "success": True, + "message": "Database connected and schema loaded successfully", + } + ) + MESSAGE_DELIMITER + else: + # Don't stream the full internal result; give higher-level error + logging.error("Database loader failed: %s", str(result)) # nosemgrep + yield json.dumps( + {"type": "error", "message": "Failed to load database schema"} + ) + MESSAGE_DELIMITER + except Exception as e: # pylint: disable=broad-exception-caught + logging.exception("Error while loading database schema: %s", str(e)) + yield json.dumps( + {"type": "error", "message": "Error connecting to database"} + ) + MESSAGE_DELIMITER + + except Exception as e: # pylint: disable=broad-exception-caught + logging.exception("Unexpected error in connect_database stream: %s", str(e)) + yield json.dumps( + {"type": "error", "message": "Internal server error"} + ) + MESSAGE_DELIMITER + finally: + overall_elapsed = time.perf_counter() - overall_start + logging.info( + "connect_database processing completed - Total time: %.2f seconds", + overall_elapsed, ) - if success: - return JSONResponse(content={ - "success": True, - "message": "Database connected successfully" - }) - - # Don't return detailed error messages to prevent information exposure - logging.error("Database loader failed: %s", result) - raise HTTPException(status_code=400, detail="Failed to load database schema") - - except (ValueError, TypeError) as e: - logging.error("Unexpected error in database connection: %s", str(e)) - raise HTTPException(status_code=500, detail="Internal server error") + return StreamingResponse(generate(), media_type="application/json") diff --git a/api/routes/graphs.py b/api/routes/graphs.py index 0aa3ad7d..4eede460 100644 --- a/api/routes/graphs.py +++ b/api/routes/graphs.py @@ -10,16 +10,14 @@ from pydantic import BaseModel from redis import ResponseError +from api.routes.database import connect_database, DatabaseConnectionRequest from api.agents import AnalysisAgent, RelevancyAgent, ResponseFormatterAgent, FollowUpAgent from api.auth.user_management import token_required from api.config import Config from api.extensions import db from api.graph import find, get_db_description -from api.loaders.csv_loader import CSVLoader -from api.loaders.json_loader import JSONLoader from api.loaders.postgres_loader import PostgresLoader from api.loaders.mysql_loader import MySQLLoader -from api.loaders.odata_loader import ODataLoader from api.memory.graphiti_tool import MemoryTool # Use the same delimiter as in the JavaScript @@ -43,9 +41,9 @@ class ChatRequest(BaseModel): Args: BaseModel (_type_): _description_ """ - chat: list - result: list = None - instructions: str = None + chat: list[str] + result: list[str] | None = None + instructions: str | None = None class ConfirmRequest(BaseModel): @@ -76,11 +74,11 @@ def get_database_type_and_loader(db_url: str): if db_url_lower.startswith('postgresql://') or db_url_lower.startswith('postgres://'): return 'postgresql', PostgresLoader - elif db_url_lower.startswith('mysql://'): + if db_url_lower.startswith('mysql://'): return 'mysql', MySQLLoader - else: - # Default to PostgresLoader for backward compatibility - return 'postgresql', PostgresLoader + + # Default to PostgresLoader for backward compatibility + return 'postgresql', PostgresLoader def sanitize_query(query: str) -> str: """Sanitize the query to prevent injection attacks.""" @@ -107,11 +105,11 @@ def _graph_name(request: Request, graph_id:str) -> str: return f"{request.state.user_id}_{graph_id}" -@graphs_router.get("") +@graphs_router.get("", operation_id="list_databases") @token_required async def list_graphs(request: Request): """ - This route is used to list all the graphs that are available in the database. + This route is used to list all the graphs (databases names) that are available in the database. """ user_id = request.state.user_id user_graphs = await db.list_graphs() @@ -120,20 +118,22 @@ async def list_graphs(request: Request): for graph in user_graphs if graph.startswith(f"{user_id}_")] return JSONResponse(content=filtered_graphs) - -@graphs_router.get("/{graph_id}/data") +@graphs_router.get("/{graph_id}/data", operation_id="database_schema") @token_required -async def get_graph_data(request: Request, graph_id: str): - """Return all nodes and edges for the specified graph (namespaced to the user). +async def get_graph_data(request: Request, graph_id: str): # pylint: disable=too-many-locals,too-many-branches + """Return all nodes and edges for the specified database schema (namespaced to the user). This endpoint returns a JSON object with two keys: `nodes` and `edges`. Nodes contain a minimal set of properties (id, name, labels, props). Edges contain source and target node names (or internal ids), type and props. + + args: + graph_id (str): The ID of the graph to query (the database name). """ namespaced = _graph_name(request, graph_id) try: graph = db.select_graph(namespaced) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logging.error("Failed to select graph %s: %s", sanitize_log_input(namespaced), e) return JSONResponse(content={"error": "Graph not found or database error"}, status_code=404) @@ -154,7 +154,7 @@ async def get_graph_data(request: Request, graph_id: str): try: tables_res = (await graph.query(tables_query)).result_set links_res = (await graph.query(links_query)).result_set - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logging.error("Error querying graph data for %s: %s", sanitize_log_input(namespaced), e) return JSONResponse(content={"error": "Failed to read graph data"}, status_code=500) @@ -162,7 +162,7 @@ async def get_graph_data(request: Request, graph_id: str): for row in tables_res: try: table_name, columns = row - except Exception: + except Exception: # pylint: disable=broad-exception-caught continue # Normalize columns: ensure a list of dicts with name/type if not isinstance(columns, list): @@ -190,7 +190,7 @@ async def get_graph_data(request: Request, graph_id: str): continue normalized.append({"name": name, "type": ctype}) - except Exception: + except Exception: # pylint: disable=broad-exception-caught continue nodes.append({ @@ -204,7 +204,7 @@ async def get_graph_data(request: Request, graph_id: str): for row in links_res: try: source, target = row - except Exception: + except Exception: # pylint: disable=broad-exception-caught continue key = (source, target) if key in seen: @@ -217,7 +217,7 @@ async def get_graph_data(request: Request, graph_id: str): @graphs_router.post("") @token_required -async def load_graph(request: Request, data: GraphData = None, file: UploadFile = File(None)): +async def load_graph(request: Request, data: GraphData = None, file: UploadFile = File(None)): # pylint: disable=unused-argument """ This route is used to load the graph data into the database. It expects either: @@ -225,62 +225,39 @@ async def load_graph(request: Request, data: GraphData = None, file: UploadFile - A File upload (multipart/form-data) - An XML payload (application/xml or text/xml) """ - success, result = False, "Invalid content type" - graph_id = "" # ✅ Handle JSON Payload - if data: - if not hasattr(data, 'database') or not data.database: - raise HTTPException(status_code=400, detail="Invalid JSON data") - - graph_id = f"{request.state.user_id}_{data.database}" - success, result = await JSONLoader.load(graph_id, data.dict()) - + if data: # pylint: disable=no-else-raise + raise HTTPException(status_code=501, detail="JSONLoader is not implemented yet") # ✅ Handle File Upload elif file: - content = await file.read() filename = file.filename # ✅ Check if file is JSON - if filename.endswith(".json"): - try: - data = json.loads(content.decode("utf-8")) - graph_id = f"{request.state.user_id}_{data.get('database', '')}" - success, result = await JSONLoader.load(graph_id, data) - except json.JSONDecodeError: - raise HTTPException(status_code=400, detail="Invalid JSON file") + if filename.endswith(".json"): # pylint: disable=no-else-raise + raise HTTPException(status_code=501, detail="JSONLoader is not implemented yet") # ✅ Check if file is XML elif filename.endswith(".xml"): - xml_data = content.decode("utf-8") - graph_id = f"{request.state.user_id}_{filename.replace('.xml', '')}" - success, result = await ODataLoader.load(graph_id, xml_data) + raise HTTPException(status_code=501, detail="ODataLoader is not implemented yet") # ✅ Check if file is csv elif filename.endswith(".csv"): - csv_data = content.decode("utf-8") - graph_id = f"{request.state.user_id}_{filename.replace('.csv', '')}" - success, result = await CSVLoader.load(graph_id, csv_data) - + raise HTTPException(status_code=501, detail="CSVLoader is not implemented yet") else: raise HTTPException(status_code=415, detail="Unsupported file type") else: raise HTTPException(status_code=415, detail="Unsupported Content-Type") - # ✅ Return the final response - if success: - return JSONResponse(content={"message": "Graph loaded successfully", "graph_id": graph_id}) - - # Log detailed error but return generic message to user - logging.error("Graph loading failed: %s", str(result)[:100]) - raise HTTPException(status_code=400, detail="Failed to load graph data") - - -@graphs_router.post("/{graph_id}") +@graphs_router.post("/{graph_id}", operation_id="query_database") @token_required -async def query_graph(request: Request, graph_id: str, chat_data: ChatRequest): +async def query_graph(request: Request, graph_id: str, chat_data: ChatRequest): # pylint: disable=too-many-statements """ - text2sql + Query the Database with the given graph_id and chat_data. + + Args: + graph_id (str): The ID of the graph to query. + chat_data (ChatRequest): The chat data containing user queries and context. """ graph_id = _graph_name(request, graph_id) @@ -310,11 +287,11 @@ async def query_graph(request: Request, graph_id: str, chat_data: ChatRequest): memory_tool_task = asyncio.create_task(MemoryTool.create(request.state.user_id, graph_id)) # Create a generator function for streaming - async def generate(): + async def generate(): # pylint: disable=too-many-locals,too-many-branches,too-many-statements # Start overall timing overall_start = time.perf_counter() logging.info("Starting query processing pipeline for query: %s", - sanitize_query(queries_history[-1])) + sanitize_query(queries_history[-1])) # nosemgrep agent_rel = RelevancyAgent(queries_history, result_history) agent_an = AnalysisAgent(queries_history, result_history) @@ -328,7 +305,7 @@ async def generate(): db_description, db_url = await get_db_description(graph_id) # Determine database type and get appropriate loader - db_type, loader_class = get_database_type_and_loader(db_url) + _, loader_class = get_database_type_and_loader(db_url) if not loader_class: overall_elapsed = time.perf_counter() - overall_start @@ -366,7 +343,7 @@ async def generate(): "final_response": True, "message": "Off topic question: " + answer_rel["reason"], } - logging.info("SQL Fail reason: %s", answer_rel["reason"]) + logging.info("SQL Fail reason: %s", answer_rel["reason"]) # nosemgrep yield json.dumps(step) + MESSAGE_DELIMITER # Total time for off-topic query overall_elapsed = time.perf_counter() - overall_start @@ -377,7 +354,7 @@ async def generate(): result = await find_task logging.info("Calling to analysis agent with query: %s", - sanitize_query(queries_history[-1])) + sanitize_query(queries_history[-1])) # nosemgrep memory_tool = await memory_tool_task memory_context = await memory_tool.search_memories( query=queries_history[-1] @@ -393,10 +370,10 @@ async def generate(): follow_up_result = "" execution_error = False - logging.info("Generated SQL query: %s", answer_an['sql_query']) + logging.info("Generated SQL query: %s", answer_an['sql_query']) # nosemgrep yield json.dumps( { - "type": "final_result", + "type": "sql_query", "data": answer_an["sql_query"], "conf": answer_an["confidence"], "miss": answer_an["missing_information"], @@ -407,7 +384,7 @@ async def generate(): } ) + MESSAGE_DELIMITER - # If the SQL query is valid, execute it using the postgress database db_url + # If the SQL query is valid, execute it using the postgres database db_url if answer_an["is_sql_translatable"]: # Check if this is a destructive operation that requires confirmation sql_query = answer_an["sql_query"] @@ -469,7 +446,9 @@ async def generate(): return # Stop here and wait for user confirmation try: - step = {"type": "reasoning_step", "final_response": False, "message": "Step 2: Executing SQL query"} + step = {"type": "reasoning_step", + "final_response": False, + "message": "Step 2: Executing SQL query"} yield json.dumps(step) + MESSAGE_DELIMITER # Check if this query modifies the database schema using the appropriate loader @@ -555,24 +534,25 @@ async def generate(): overall_elapsed ) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught execution_error = str(e) overall_elapsed = time.perf_counter() - overall_start - logging.error("Error executing SQL query: %s", str(e)) + logging.error("Error executing SQL query: %s", str(e)) # nosemgrep logging.info( "Query processing failed during execution - Total time: %.2f seconds", overall_elapsed ) - yield json.dumps( - {"type": "error", "final_response": True, "message": "Error executing SQL query"} - ) + MESSAGE_DELIMITER + yield json.dumps({ + "type": "error", + "final_response": True, + "message": "Error executing SQL query" + }) + MESSAGE_DELIMITER else: execution_error = "Missing information" # SQL query is not valid/translatable - generate follow-up questions follow_up_result = follow_up_agent.generate_follow_up_question( user_question=queries_history[-1], - analysis_result=answer_an, - found_tables=result + analysis_result=answer_an ) # Send follow-up questions to help the user @@ -600,7 +580,7 @@ async def generate(): "generated_sql": answer_an.get('sql_query', ""), "answer": final_answer } - + # Add error information if SQL execution failed if execution_error: full_response["error"] = execution_error @@ -608,7 +588,7 @@ async def generate(): else: full_response["success"] = True - + # Save query to memory save_query_task = asyncio.create_task( memory_tool.save_query_memory( @@ -619,20 +599,23 @@ async def generate(): ) ) save_query_task.add_done_callback( - lambda t: logging.error(f"Query memory save failed: {t.exception()}") + lambda t: logging.error("Query memory save failed: %s", t.exception()) # nosemgrep if t.exception() else logging.info("Query memory saved successfully") ) - + # Save conversation with memory tool (run in background) save_task = asyncio.create_task(memory_tool.add_new_memory(full_response)) # Add error handling callback to prevent silent failures - save_task.add_done_callback(lambda t: logging.error(f"Memory save failed: {t.exception()}") if t.exception() else logging.info("Conversation saved to memory tool")) + save_task.add_done_callback( + lambda t: logging.error("Memory save failed: %s", t.exception()) # nosemgrep + if t.exception() else logging.info("Conversation saved to memory tool") + ) logging.info("Conversation save task started in background") - + # Clean old memory in background (once per week cleanup) clean_memory_task = asyncio.create_task(memory_tool.clean_memory()) clean_memory_task.add_done_callback( - lambda t: logging.error(f"Memory cleanup failed: {t.exception()}") + lambda t: logging.error("Memory cleanup failed: %s", t.exception()) # nosemgrep if t.exception() else logging.info("Memory cleanup completed successfully") ) @@ -672,13 +655,13 @@ async def confirm_destructive_operation( async def generate_confirmation(): # Create memory tool for saving query results memory_tool = await MemoryTool.create(request.state.user_id, graph_id) - + if confirmation == "CONFIRM": try: db_description, db_url = await get_db_description(graph_id) # Determine database type and get appropriate loader - db_type, loader_class = get_database_type_and_loader(db_url) + _, loader_class = get_database_type_and_loader(db_url) if not loader_class: yield json.dumps({ @@ -757,34 +740,40 @@ async def generate_confirmation(): # Save successful confirmed query to memory save_query_task = asyncio.create_task( memory_tool.save_query_memory( - query=queries_history[-1] if queries_history else "Destructive operation confirmation", + query=(queries_history[-1] if queries_history + else "Destructive operation confirmation"), sql_query=sql_query, success=True, error="" ) ) save_query_task.add_done_callback( - lambda t: logging.error(f"Confirmed query memory save failed: {t.exception()}") + lambda t: logging.error("Confirmed query memory save failed: %s", + t.exception()) # nosemgrep if t.exception() else logging.info("Confirmed query memory saved successfully") ) - except Exception as e: - logging.error("Error executing confirmed SQL query: %s", str(e)) - + except Exception as e: # pylint: disable=broad-exception-caught + logging.error("Error executing confirmed SQL query: %s", str(e)) # nosemgrep + # Save failed confirmed query to memory save_query_task = asyncio.create_task( memory_tool.save_query_memory( - query=queries_history[-1] if queries_history else "Destructive operation confirmation", + query=(queries_history[-1] if queries_history + else "Destructive operation confirmation"), sql_query=sql_query, success=False, error=str(e) ) ) save_query_task.add_done_callback( - lambda t: logging.error(f"Failed confirmed query memory save failed: {t.exception()}") - if t.exception() else logging.info("Failed confirmed query memory saved successfully") + lambda t: logging.error( # nosemgrep + "Failed confirmed query memory save failed: %s", t.exception() + ) if t.exception() else logging.info( + "Failed confirmed query memory saved successfully" + ) ) - + yield json.dumps( {"type": "error", "message": "Error executing query"} ) + MESSAGE_DELIMITER @@ -811,45 +800,23 @@ async def refresh_graph_schema(request: Request, graph_id: str): graph_id = _graph_name(request, graph_id) try: - # Get database connection details + # Get database description and URL _, db_url = await get_db_description(graph_id) if not db_url or db_url == "No URL available for this database.": - return JSONResponse({ - "success": False, - "error": "No database URL found for this graph" - }, status_code=400) + raise HTTPException(status_code=404, detail="No database URL found for this graph") - # Determine database type and get appropriate loader - db_type, loader_class = get_database_type_and_loader(db_url) + # Create a database connection request with the stored URL + db_request = DatabaseConnectionRequest(url=db_url) - if not loader_class: - return JSONResponse({ - "success": False, - "error": "Unable to determine database type" - }, status_code=400) - - # Perform schema refresh using the appropriate loader - success, message = await loader_class.refresh_graph_schema(graph_id, db_url) - - if success: - return JSONResponse({ - "success": True, - "message": f"Graph schema refreshed successfully using {db_type}" - }) - - logging.error("Schema refresh failed for graph %s: %s", graph_id, message) - return JSONResponse({ - "success": False, - "error": "Failed to refresh schema" - }, status_code=500) + # Call connect_database to refresh the schema by reconnecting + return await connect_database(request, db_request) + except HTTPException: + raise except Exception as e: - logging.error("Error in manual schema refresh: %s", e) - return JSONResponse({ - "success": False, - "error": "Error refreshing schema" - }, status_code=500) + logging.error("Error in refresh_graph_schema: %s", str(e)) + raise HTTPException(status_code=500, detail="Internal server error while refreshing schema") # pylint: disable=raise-missing-from @graphs_router.delete("/{graph_id}") @token_required @@ -871,6 +838,6 @@ async def delete_graph(request: Request, graph_id: str): except ResponseError: return JSONResponse(content={"error": "Failed to delete graph, Graph not found"}, status_code=404) - except Exception as e: + except Exception as e: # pylint: disable=broad-exception-caught logging.exception("Failed to delete graph %s: %s", sanitize_log_input(namespaced), e) return JSONResponse(content={"error": "Failed to delete graph"}, status_code=500) diff --git a/api/routes/tokens.py b/api/routes/tokens.py new file mode 100644 index 00000000..180a011a --- /dev/null +++ b/api/routes/tokens.py @@ -0,0 +1,153 @@ +"""Token management routes for the QueryWeaver API.""" + +import logging +import secrets +from typing import List + +from fastapi import APIRouter, Request, HTTPException, status +from fastapi.responses import JSONResponse +from pydantic import BaseModel + +from api.auth.user_management import token_required +from api.extensions import db + + +# Router +tokens_router = APIRouter() + +class TokenListItem(BaseModel): + """Response model for token list items""" + token_id: str + created_at: int + +class TokenListResponse(BaseModel): + """Response model for token list""" + tokens: List[TokenListItem] + +@tokens_router.post("/generate", response_model=TokenListItem) +@token_required +async def generate_token(request: Request) -> TokenListItem: + """Generate a new API token for the authenticated user""" + try: + user_email = request.state.user_email + + # Call the registered Google callback handler if it exists to store user data. + handler = getattr(request.app.state, "callback_handler", None) + if handler: + api_token = secrets.token_urlsafe(32) # ~43 chars, hard to guess + + user_data = { + "id": "0", + "email": user_email, + "name": "token token", + "picture": "" + } + + # Call the registered handler (await if async) + await handler('api', user_data, api_token) + + logging.info("Token generated for user: %s", user_email) # nosemgrep + + return TokenListItem( + token_id=api_token, + created_at=0 # Real timestamp is set by auth system in graph DB + ) + + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Failed to generate token" + ) + + except HTTPException: + raise + except Exception as e: + logging.error("Error generating token: %s", e) # nosemgrep + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal server error" + ) from e + +@tokens_router.get("/list", response_model=TokenListResponse) +@token_required +async def list_tokens(request: Request) -> TokenListResponse: + """List all tokens for the authenticated user""" + try: + user_email = request.state.user_email + + # Get tokens from Organizations graph + organizations_graph = db.select_graph("Organizations") + + # Get user information by API token and then get all associated tokens that connected + # to the Identity of provider='api' + query = """ + MATCH(:Identity {email:$user_email, provider:'api'})-[:HAS_TOKEN]->(token:Token) + RETURN token.id, token.created_at + """ + + result = await organizations_graph.query(query, {"user_email": user_email}) + + tokens = [] + if result.result_set: + for row in result.result_set: + tokens.append(TokenListItem( + token_id=row[0][-4:], # last 4 chars in the token_id str + created_at=row[1], + )) + + return TokenListResponse(tokens=tokens) + + except HTTPException: + raise + except Exception as e: + logging.error("Error listing tokens: %s", e) # nosemgrep + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal server error" + ) from e + +@tokens_router.delete("/{token_id}") +@token_required +async def delete_token(request: Request, token_id: str) -> JSONResponse: + """Delete a specific token for the authenticated user""" + try: + user_email = request.state.user_email + + # Delete token from Organizations graph + organizations_graph = db.select_graph("Organizations") + + # Delete the token + delete_query = """ + MATCH (user:Identity {email:$user_email, provider:'api'})-[:HAS_TOKEN]->(token:Token) + WHERE RIGHT(token.id, 4)=$token_id + DELETE token + RETURN COUNT(*) AS deleted_count + """ + + result = await organizations_graph.query(delete_query, { + "user_email": user_email, + "token_id": token_id + }) + + # Sanitize token_id to prevent log injection + sanitized_token_id = token_id.replace('\n', ' ').replace('\r', ' ') if token_id else 'Unknown' # pylint: disable=line-too-long + logging.info("Token deleted for user %s: token_id=%s", user_email, sanitized_token_id) # nosemgrep pylint: disable=line-too-long + + if result.result_set and result.result_set[0][0] > 0: + return JSONResponse( + status_code=status.HTTP_200_OK, + content={"message": "Token deleted successfully"} + ) + + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Token not found" + ) + + except HTTPException: + raise + except Exception as e: + logging.error("Error deleting token: %s", e) # nosemgrep + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Internal server error" + ) from e diff --git a/api/schema_aba.json b/api/schema_aba.json deleted file mode 100644 index 422bff52..00000000 --- a/api/schema_aba.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "MySQL Database Schema with Descriptions", - "description": "JSON Schema for MySQL database structure including table and column descriptions", - "type": "object", - "additionalProperties": false, - "required": ["database", "tables"], - "properties": { - "database": { - "type": "string", - "description": "Name of the database", - "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$" - }, - "tables": { - "type": "object", - "description": "Collection of database tables", - "patternProperties": { - "^[a-zA-Z_][a-zA-Z0-9_]*$": { - "type": "object", - "description": "Table definition", - "required": ["description", "columns", "foreign_keys"], - "additionalProperties": false, - "properties": { - "description": { - "type": "string", - "description": "Detailed description of the table's purpose and usage" - }, - "columns": { - "type": "object", - "description": "Collection of table columns", - "patternProperties": { - "^[a-zA-Z_][a-zA-Z0-9_]*$": { - "type": "object", - "required": ["description"], - "additionalProperties": false, - "properties": { - "description": { - "type": "string", - "description": "Detailed description of the column's purpose and usage" - } - } - } - }, - "additionalProperties": false, - "minProperties": 1 - }, - "foreign_keys": { - "type": "object", - "description": "Collection of foreign key constraints", - "patternProperties": { - "^[a-zA-Z_][a-zA-Z0-9_]*$": { - "type": "object", - "required": ["column", "referenced_table", "referenced_column"], - "additionalProperties": false, - "properties": { - "column": { - "type": "string", - "description": "Local column name" - }, - "referenced_table": { - "type": "string", - "description": "Referenced table name" - }, - "referenced_column": { - "type": "string", - "description": "Referenced column name" - } - } - } - }, - "additionalProperties": false - } - } - } - }, - "additionalProperties": false, - "minProperties": 1 - } - } -} \ No newline at end of file diff --git a/api/schema_schema.json b/api/schema_schema.json deleted file mode 100644 index 55a00520..00000000 --- a/api/schema_schema.json +++ /dev/null @@ -1,152 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "MySQL Database Schema with Descriptions", - "description": "JSON Schema for MySQL database structure including table and column descriptions", - "type": "object", - "additionalProperties": false, - "required": ["database", "tables"], - "properties": { - "database": { - "type": "string", - "description": "Name of the database", - "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$" - }, - "tables": { - "type": "object", - "description": "Collection of database tables", - "patternProperties": { - "^[a-zA-Z_][a-zA-Z0-9_]*$": { - "type": "object", - "description": "Table definition", - "required": ["description", "columns", "indexes", "foreign_keys"], - "additionalProperties": false, - "properties": { - "description": { - "type": "string", - "description": "Detailed description of the table's purpose and usage" - }, - "columns": { - "type": "object", - "description": "Collection of table columns", - "patternProperties": { - "^[a-zA-Z_][a-zA-Z0-9_]*$": { - "type": "object", - "required": ["description", "type", "null", "key", "default", "extra"], - "additionalProperties": false, - "properties": { - "description": { - "type": "string", - "description": "Detailed description of the column's purpose and usage" - }, - "type": { - "type": "string", - "description": "SQL data type", - "examples": ["int(11)", "varchar(255)", "text", "timestamp"] - }, - "null": { - "type": "string", - "enum": ["YES", "NO"], - "description": "Whether the column can be NULL" - }, - "key": { - "type": "string", - "enum": ["", "PRI", "UNI", "MUL"], - "description": "Key type (PRI=primary, UNI=unique, MUL=index)" - }, - "default": { - "type": ["string", "null"], - "description": "Default value for the column" - }, - "extra": { - "type": "string", - "description": "Additional column attributes", - "examples": ["auto_increment", "on update CURRENT_TIMESTAMP"] - } - } - } - }, - "additionalProperties": false, - "minProperties": 1 - }, - "indexes": { - "type": "object", - "description": "Collection of table indexes", - "patternProperties": { - "^[a-zA-Z_][a-zA-Z0-9_]*$": { - "type": "object", - "required": ["columns", "unique", "type"], - "additionalProperties": false, - "properties": { - "columns": { - "type": "array", - "description": "Columns included in the index", - "minItems": 1, - "items": { - "type": "object", - "required": ["name", "sub_part", "seq_in_index"], - "additionalProperties": false, - "properties": { - "name": { - "type": "string", - "description": "Name of the indexed column" - }, - "sub_part": { - "type": ["integer", "null"], - "description": "Length of indexed prefix if partial index" - }, - "seq_in_index": { - "type": "integer", - "minimum": 1, - "description": "Position of column in multi-column index" - } - } - } - }, - "unique": { - "type": "boolean", - "description": "Whether this is a unique index" - }, - "type": { - "type": "string", - "enum": ["BTREE", "HASH"], - "description": "Type of index" - } - } - } - }, - "additionalProperties": false - }, - "foreign_keys": { - "type": "object", - "description": "Collection of foreign key constraints", - "patternProperties": { - "^[a-zA-Z_][a-zA-Z0-9_]*$": { - "type": "object", - "required": ["column", "referenced_table", "referenced_column"], - "additionalProperties": false, - "properties": { - "column": { - "type": "string", - "description": "Local column name" - }, - "referenced_table": { - "type": "string", - "description": "Referenced table name" - }, - "referenced_column": { - "type": "string", - "description": "Referenced column name" - } - } - } - }, - "additionalProperties": false - } - } - } - }, - "additionalProperties": false, - "minProperties": 1 - } - } -} \ No newline at end of file diff --git a/api/utils.py b/api/utils.py index d4f6ba8c..14dd09d6 100644 --- a/api/utils.py +++ b/api/utils.py @@ -1,12 +1,10 @@ """Utility functions for the text2sql API.""" -import json -from typing import List, Tuple +from typing import List from litellm import completion from api.config import Config -from api.constants import BENCHMARK def generate_db_description( @@ -67,128 +65,3 @@ def generate_db_description( ) description = response.choices[0].message["content"] return description - - -def llm_answer_validator(question: str, answer: str, expected_answer: str = None) -> str: - """ - Validate an answer using LLM. - - Args: - question: The original question - answer: The generated answer - expected_answer: The expected answer for comparison - - Returns: - JSON string with validation results - """ - prompt = """ - You are evaluating an answer generated by a text-to-sql RAG-based system. Assess how well the Generated Answer (generated sql) addresses the Question - based on the Expected Answer. - - Question: - {question} - - Expected Answer: - {expected_answer} - - Generated Answer: - {generated_answer} - - Provide a relevance score from 0 to 1 (1 being a perfect response) and justify your reasoning in a concise explanation. - Output Json format: - {{"relevance_score": float, "explanation": "Your assessment here."}} - """ - response = completion( - model=Config.VALIDATOR_MODEL, - messages=[ - {"role": "system", "content": "You are a Validator assistant."}, - { - "role": "user", - "content": prompt.format( - question=question, - expected_answer=expected_answer, - generated_answer=answer, - ), - }, - ], - response_format={"type": "json_object"}, - ) - validation_set = response.choices[0].message["content"].strip() - return validation_set - - -def llm_table_validator(question: str, answer: str, tables: List[str]) -> Tuple[float, str]: - """ - Validate table relevance using LLM. - - Args: - question: The original question - answer: The generated answer - tables: List of available tables - - Returns: - Tuple of relevance score and explanation - """ - prompt = """ - You are evaluating an answer generated by a text-to-sql RAG-based system. Assess how well the retrived Tables relevant to the question and supports the Generated Answer (generated sql). - - The tables are with the following structure: - {{"schema": [["table_name", description, [{{"column_name": "column_description", "data_type": "data_type",...}},...]],...]}} - - Question: - {question} - - Tables: - {tables} - - Generated Answer: - {generated_answer} - - Provide a relevance score from 0 to 1 (1 being a perfect response) and justify your reasoning in a concise explanation. - Output Json format: - {{"relevance_score": float, "explanation": "Your assessment here."}} - """ - response = completion( - model=Config.VALIDATOR_MODEL, - messages=[ - {"role": "system", "content": "You are a Validator assistant."}, - { - "role": "user", - "content": prompt.format(question=question, tables=tables, generated_answer=answer), - }, - ], - response_format={"type": "json_object"}, - ) - validation_set = response.choices[0].message["content"].strip() - try: - val_res = json.loads(validation_set) - score = val_res["relevance_score"] - explanation = val_res["explanation"] - except (json.JSONDecodeError, KeyError) as e: - print(f"Error: {e}") - score = 0.0 - explanation = "Error: Unable to parse the response." - - return score, explanation - - -def run_benchmark(): - """ - Run the benchmark for the text2sql module. - """ - # Load the benchmark data - benchmark_data = BENCHMARK - - # Initialize the benchmark results - results = [] - - for data in benchmark_data: - success, result = generate_db_description( - db_name=data["database"], table_names=list(data["tables"].keys()) - ) - - if success: - results.append(result) - else: - results.append(f"Error: {result}") - - return results diff --git a/app/public/css/base.css b/app/public/css/base.css index 3fe58204..811a79fb 100644 --- a/app/public/css/base.css +++ b/app/public/css/base.css @@ -11,19 +11,26 @@ src: url('fonts/fira_code.ttf'); } +@font-face { + font-family: 'Inter'; + src: url('fonts/inter.ttf'); +} + +* { + font-family: 'Inter' !important; +} + +.query-text { + font-family: 'Fira Code' !important; +} + body { - font-family: 'Fira Code', monospace; background-color: var(--falkor-secondary); display: flex; flex-direction: column; color: var(--text-primary); } -/* Ensure all form elements inherit the consistent font */ -button, input, select, textarea { - font-family: inherit; -} - /* Scrollbar Styles */ ::-webkit-scrollbar { width: 20px; diff --git a/app/public/css/buttons.css b/app/public/css/buttons.css index 28602d48..e7a7b8a9 100644 --- a/app/public/css/buttons.css +++ b/app/public/css/buttons.css @@ -119,22 +119,35 @@ font-size: 22px; } -.user-profile-logout { - width: 100%; - padding: 10px; +#user-profile-logout { background: #D32F2F; color: #fff; - border: none; - border-radius: 6px; +} + +#user-profile-logout:hover { + background: #B71C1C; +} + +.user-profile-action { cursor: pointer; + border-radius: 4px; + transition: all 0.2s ease; + width: 100%; + padding: 10px; font-weight: bold; - transition: background 0.2s; + border: none; + color: var(--text-primary); } -.user-profile-logout:hover { - background: #B71C1C; +#api-tokens-btn { + background: var(--falkor-quaternary); +} + +#api-tokens-btn:hover { + background: var(--bg-tertiary); } + /* Theme Toggle Button Styles */ #theme-toggle-btn { position: static; @@ -240,7 +253,6 @@ padding: 12px 24px; border: none; border-radius: 6px; - font-family: 'Fira Code', monospace; font-size: 14px; font-weight: bold; cursor: pointer; @@ -353,11 +365,44 @@ align-items: center; } +/* Footer: pin to bottom of the toolbar */ +#left-toolbar-inner { + /* allow the inner area to grow so footer can sit at the bottom */ + display: flex; + flex-direction: column; + align-items: center; + gap: 10px; + width: 100%; + flex: 1 1 auto; +} + #left-toolbar-footer { - margin-top: 8px; + margin-top: auto; /* push footer to bottom */ width: 100%; - height: 6px; flex-shrink: 0; + display: flex; + align-items: center; + justify-content: center; + padding: 10px 0; + box-sizing: border-box; +} + +#toolbar-footer-buttons { + display: flex; + flex-direction: column; + gap: 8px; + width: 100%; + align-items: center; + justify-content: center; +} + +.toolbar-footer-button { + width: 40px; + height: 40px; + border-radius: 8px; + display: flex; + align-items: center; + justify-content: center; } /* Collapsed state: only show the burger button. Remove background/border/shadow so the bar is unobtrusive. */ diff --git a/app/public/css/chat-components.css b/app/public/css/chat-components.css index df098210..4a62d761 100644 --- a/app/public/css/chat-components.css +++ b/app/public/css/chat-components.css @@ -203,6 +203,18 @@ border: none; font-size: 18px !important; font-weight: 500 !important; + resize: none; + min-height: 24px; + height: 24px; + max-height: calc(24px * 5); + overflow-y: auto; + line-height: 24px; + scrollbar-width: none; + -ms-overflow-style: none; +} + +#message-input::-webkit-scrollbar { + display: none; } #message-input:focus { diff --git a/app/public/css/fonts/inter.ttf b/app/public/css/fonts/inter.ttf new file mode 100644 index 00000000..e31b51e3 Binary files /dev/null and b/app/public/css/fonts/inter.ttf differ diff --git a/app/public/css/landing.css b/app/public/css/landing.css deleted file mode 100644 index 08acab18..00000000 --- a/app/public/css/landing.css +++ /dev/null @@ -1,331 +0,0 @@ -.landing-container { - max-width: 1200px; - margin: 3rem auto; - display: grid; - grid-template-columns: 1fr 500px; - gap: 2.5rem; - align-items: center; - padding: 0 1rem; -} - -/* Site header */ -.site-header { - width: 100%; - background: transparent; - border-bottom: solid 1px var(--falkor-border-secondary); -} - -.site-header-inner { - padding: 1rem; - display: inline-flex; - align-items: center; - gap: 0.4rem; - color: var(--text-primary); - text-decoration: none; - font-weight: 700; - padding-left: 0.25rem; -} - -.site-header-inner img { - height: 40px; - width: auto; - display: block; -} -.site-title { - font-size: 0.95rem; - display: none; -} - -.hero-left { - padding: 1rem 0; -} - -.hero-title { - font-family: 'Inter', system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial; - font-weight: 800; - font-size: 4rem; - line-height: 1.02; - margin: 0 0 1rem 0; - color: var(--text-primary); -} - -.hero-sub { - color: var(--text-secondary); - font-size: 1.05rem; - max-width: 44rem; - margin-bottom: 1.5rem; -} - -.hero-ctas { - display: flex; - gap: 1rem; - align-items: center; -} - -.btn-pill { - display: inline-block; - padding: 0.9rem 1.25rem; - border-radius: 999px; - background: var(--falkor-primary); - color: #fff; - text-decoration: none; - box-shadow: 0 8px 20px rgba(91, 107, 192, 0.14); - transition: transform 140ms ease, box-shadow 140ms ease, filter 140ms ease; - cursor: pointer; -} - -.btn-ghost { - display: inline-block; - padding: 0.9rem 1.25rem; - border-radius: 999px; - background: transparent; - color: var(--text-primary); - text-decoration: none; - border: 1px solid var(--falkor-border-tertiary); - box-shadow: 0 6px 14px rgba(11, 18, 32, 0.06); - transition: background 0.12s ease, border-color 0.12s ease, box-shadow 0.12s ease; -} - -.btn-ghost:hover { - background: rgba(255, 255, 255, 0.02); - border-color: var(--border-color); - box-shadow: 0 10px 24px rgba(11, 18, 32, 0.08); -} - -.btn-ghost:focus { - outline: none; - box-shadow: 0 0 0 3px rgba(91, 107, 192, 0.08); -} - -.btn-pill:hover { - transform: translateY(-4px); - box-shadow: 0 18px 36px rgba(91, 107, 192, 0.18); -} - -.btn-pill:active { - transform: translateY(-1px) scale(0.997); -} - -.btn-pill:focus { - outline: none; - box-shadow: 0 0 0 4px rgba(59,130,246,0.12); -} - -.demo-card { - background: var(--bg-tertiary); - border-radius: 12px; - box-shadow: 0 16px 30px rgba(11, 18, 32, 0.06); - padding: 1rem; - border: 1px solid var(--border-color); -} - -/* Use a neutral themed surface for the inner area so it adapts to light/dark */ -.demo-inner { - border-radius: 8px; - padding: 1rem; - border: 1px solid var(--falkor-border-tertiary); - border: none; -} - -.demo-label { - font-size: 0.9rem; - color: var(--text-secondary); - margin-bottom: 0.5rem; -} - -.demo-sql-header { - display: flex; - justify-content: space-between; - align-items: center; - margin-top: 1rem; -} - -.demo-success { - display: flex; - align-items: center; - gap: 0.5rem; - color: #10B981; - font-size: 0.9rem; - font-weight: 600; -} - -.demo-success svg { - flex-shrink: 0; -} - -/* Use the theme's secondary surface for the white/black boxes so text contrast is correct in both themes */ -.demo-question { - background: var(--falkor-secondary); - border-radius: 6px; - padding: 0.75rem 1rem; - border: 1px solid var(--falkor-border-tertiary); - color: var(--text-primary); - height: 120px; - white-space: pre-wrap; - font-family: monospace; - font-size: 0.95rem; - overflow: auto; - line-height: 1.3; -} - -.demo-sql { - background: var(--falkor-secondary); - border-radius: 6px; - padding: 0.75rem 1rem; - border: 1px solid var(--falkor-border-tertiary); - color: var(--text-primary); - margin-top: 0.5rem; - font-size: 0.9rem; - overflow: auto; - height: 200px; - line-height: 1.25; -} - -/* SQL token colors for demo code */ -.demo-sql .sql-keyword { color: #7c3aed; font-weight: 700; } -.demo-sql .sql-string { color: #059669; } -.demo-sql .sql-func { color: #2563eb; } -.demo-sql .sql-number { color: #b45309; } - -.demo-sql { white-space: pre-wrap; font-family: monospace; } - -.demo-sql.typing { - position: relative; -} - -.demo-sql.typing::after { - content: ''; - display: inline-block; - width: 10px; - height: 1.1em; - background: var(--falkor-primary); - margin-left: 6px; - vertical-align: bottom; - animation: blink-caret 1s steps(1) infinite; -} - -@keyframes blink-caret { - 0%, 50% { - opacity: 1; - } - - 51%, 100% { - opacity: 0; - } -} - -.demo-cta { - margin-top: 1rem; - text-align: center; -} - -.demo-cta .btn-full { - display: inline-block; - width: 100%; - padding: 0.75rem; - border-radius: 8px; - background: #e7f1ff; - color: var(--falkor-primary); - text-decoration: none; - border: none; -} - -.demo-cta .btn-full:hover { - transform: translateY(-3px); - box-shadow: 0 14px 30px rgba(11,18,32,0.12); - background: #d9ecff; -} - -.demo-cta .btn-full:active { - transform: translateY(-1px) scale(0.998); -} - -.demo-cta .btn-full:focus { - outline: none; - box-shadow: 0 0 0 4px rgba(59,130,246,0.08); -} - -@media (max-width: 900px) { - .landing-container { - grid-template-columns: 1fr; - gap: 1.25rem; - } - - .hero-title { - font-size: 2.4rem; - } -} - -/* Feature boxes row */ -.features-row { - display: flex; - gap: 1rem; - margin-top: 4.5rem; - align-items: stretch; - justify-content: center; -} - -.feature-card { - width: 280px; - background: var(--bg-tertiary); - border-radius: 6px; - padding: 0.9rem 1rem; - border: 1px solid var(--falkor-border-tertiary); - box-shadow: 0 8px 20px rgba(11, 18, 32, 0.04); - text-align: center; - transition: transform 180ms ease, box-shadow 180ms ease, border-color 180ms ease; - border: none; -} - -.feature-card:hover { - transform: translateY(-6px); - box-shadow: 0 20px 40px rgba(11, 18, 32, 0.18); - border: solid 1px var(--falkor-border-secondary); -} - -.feature-card .feature-icon { - width: 36px; - height: 36px; - display: inline-flex; - align-items: center; - justify-content: center; - border-radius: 999px; - background: rgba(59,130,246,0.06); - margin: 0 auto 0.6rem auto; -} - -.feature-card:hover .feature-icon { - background: rgba(59,130,246,0.12); -} - -.feature-title { - font-size: 0.9rem; - margin: 0 0 0.35rem 0; - color: var(--text-primary); - font-weight: 700; -} - -.feature-desc { - font-size: 0.82rem; - color: var(--text-secondary); - margin: 0; - line-height: 1.3; -} - -.feature-highlight { - border: 1px solid rgba(59,130,246,0.15); -} - -@media (max-width: 900px) { - .features-row { - flex-direction: column; - gap: 0.75rem; - margin-top: 2rem; - align-items: center; - } - - .feature-card { - width: 100%; - max-width: 520px; - } -} diff --git a/app/public/css/layout.css b/app/public/css/layout.css index 84824ed9..e30f7836 100644 --- a/app/public/css/layout.css +++ b/app/public/css/layout.css @@ -9,7 +9,7 @@ } .logo { - height: 60px; + height: 240px; width: auto; } diff --git a/app/public/css/menu.css b/app/public/css/menu.css index 91d6a85c..2c8bb996 100644 --- a/app/public/css/menu.css +++ b/app/public/css/menu.css @@ -92,35 +92,7 @@ gap: 10px; } -.dropdown-container { - display: flex; - align-items: center; - gap: 5px; -} - -.dropdown-container select { - height: 100%; - padding: 8px 12px; - border: 1px solid var(--border-color); - border-radius: 6px; - font-size: 14px; - background: var(--falkor-quaternary); - color: var(--text-primary); - cursor: pointer; - transition: all 0.2s ease; -} - -.dropdown-container select:hover { - border-color: var(--falkor-primary); -} - -.dropdown-container select:focus { - outline: none; - border-color: var(--falkor-primary); - box-shadow: 0 0 0 2px rgba(66, 133, 244, 0.2); -} - -.dropdown-container button { +.header-button { height: 100%; padding: 8px 16px; border: 1px solid var(--border-color); @@ -132,50 +104,86 @@ transition: all 0.2s ease; } -.dropdown-container button:hover:not(:disabled) { +.header-button:hover:not(:disabled) { background: var(--falkor-primary); color: white; } -.dropdown-container button:disabled { +.header-button:disabled { background: var(--bg-tertiary); color: var(--text-secondary); cursor: not-allowed; border-color: var(--text-secondary); } -#graph-select { - height: 100%; - padding: 8px 12px; - border-radius: 6px; - border: 1px solid var(--text-primary); - font-size: 14px; - background-color: var(--falkor-secondary); - color: var(--text-primary); - transition: border-color 0.2s; - min-width: 180px; - appearance: none; - background-image: linear-gradient(45deg, transparent 50%, var(--text-secondary) 50%), - linear-gradient(135deg, var(--text-secondary) 50%, transparent 50%); - background-position: calc(100% - 20px) center, calc(100% - 15px) center; - background-size: 5px 5px, 5px 5px; - background-repeat: no-repeat; - cursor: pointer; +#graph-select-refresh { + display: flex; + align-items: center; + justify-content: center; +} + +#graph-select-refresh.loading svg { + animation: spin 0.5s linear infinite; + opacity: 0.5; +} + +#graph-select-refresh svg { + height: 20px; + width: 20px; +} + +#query-final-result-table { + border-collapse: collapse; +} + +#query-final-result-table th, +#query-final-result-table td { + padding: 4px 8px; +} + +#query-final-result-table th:not(:first-child), +#query-final-result-table td:not(:first-child) { + border-left: 1px solid var(--text-primary); +} + +#query-final-result-table td { + border-top: 1px solid var(--text-primary); } -/* Graph canvas container inside schema sidebar */ #schema-graph { width: 100%; - height: 100%; - min-height: 300px; - background: transparent; + height: 100%; +} + +#schema-content { position: relative; + width: 100%; + height: 100%; } -/* Ensure canvas inside ForceGraph fills the container */ -.force-graph-container, #schema-graph > canvas { - width: 100% !important; - height: 100% !important; +#schema-controls { + position: absolute; + padding: 10px; + bottom: 15px; + left: 0px; + display: flex; + flex-direction: row; + align-items: center; + justify-content: center; + gap: 10px; + z-index: 10; + pointer-events: none; +} + +#schema-controls button { + display: flex; + align-items: center; + justify-content: center; + color: var(--text-primary); + background-color: transparent; + pointer-events: auto; + border: none; + cursor: pointer; } #graph-select:focus { @@ -234,7 +242,7 @@ } /* Custom Dropdown Styles */ -.custom-dropdown { +.header-button { position: relative; display: inline-block; min-width: 140px; @@ -277,10 +285,6 @@ transition: transform 0.2s ease; } -.custom-dropdown.open .dropdown-arrow { - transform: rotate(180deg); -} - .dropdown-options { position: absolute; top: 100%; @@ -299,10 +303,51 @@ display: none; } +/* Show dropdown options when parent has .open */ +.custom-dropdown { + position: relative; + display: inline-block; +} + .custom-dropdown.open .dropdown-options { display: block; } +.custom-dropdown.open .dropdown-arrow { + transform: rotate(180deg); +} + +/* Make the database-type dropdown match the input width and add gap */ +#database-type-dropdown { + display: block; + /* behave like a block element so width:100% applies */ + width: 100%; + box-sizing: border-box; + margin: 0 0 12px 0; + /* gap between dropdown and input; prevent horizontal centering */ +} + +#database-type-dropdown .dropdown-selected { + width: 100%; + /* keep flex layout so text and arrow align on one line */ + display: flex; + align-items: center; + justify-content: space-between; +} + +/* Match modal input sizing for a consistent look */ +#database-type-dropdown .dropdown-selected { + padding: 0.6em; + font-size: 1em; + border-radius: 6px; +} + +#database-type-dropdown .dropdown-options { + left: 0; + right: 0; + /* ensure options fill the same width */ +} + .dropdown-option { padding: 8px 12px; cursor: pointer; @@ -333,7 +378,7 @@ } /* Graph custom dropdown (moved from chat_header.j2 inline styles) */ -.graph-custom-dropdown { +.graph-header-button { position: relative; display: inline-block; width: 180px; @@ -341,6 +386,7 @@ } .graph-selected { + height: 100%; padding: 8px 14px; border-radius: 6px; background: var(--falkor-quaternary); @@ -354,18 +400,17 @@ min-width: 160px; box-sizing: border-box; font-size: 14px; + gap: 4px; } .graph-options { position: absolute; - top: calc(100%); - left: 0; - right: 0; + top: calc(100% + 20px); + left: -20%; + right: -20%; background: var(--falkor-secondary); border: 1px solid var(--border-color); border-radius: 6px; - border-top-left-radius: 0; - border-top-right-radius: 0; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); max-height: 260px; overflow: auto; @@ -373,6 +418,11 @@ z-index: 50; } +.graph-custom-dropdown { + height: 100%; + position: relative; +} + .dropdown-option { display: flex; align-items: center; diff --git a/app/public/css/modals.css b/app/public/css/modals.css index 7f68e23f..1180fcc4 100644 --- a/app/public/css/modals.css +++ b/app/public/css/modals.css @@ -1,6 +1,6 @@ /* Modal and Popup Components */ -.db-modal { +#db-modal { display: none; position: fixed; top: 0; @@ -33,7 +33,7 @@ font-size: 1em; border: 1px solid var(--border-color); border-radius: 6px; - margin-bottom: 1.5em; + margin: 0 0 12px 0; /* consistent gap under elements */ color: var(--text-primary); background: var(--falkor-quaternary); } @@ -108,7 +108,7 @@ border: 1px solid var(--text-secondary); } -.google-login-modal { +.login-modal { display: none; position: fixed; top: 0; @@ -121,7 +121,7 @@ justify-content: center; } -.google-login-modal-content { +.login-modal-content { background: var(--falkor-secondary); padding: 2em 3em; border-radius: 10px; @@ -130,18 +130,123 @@ color: var(--text-primary); } -.google-login-modal-content h2 { +.login-modal-content h2 { color: var(--text-primary); margin-bottom: 0.5em; font-size: 1.5em; } -.google-login-modal-content p { +.login-modal-content p { color: var(--text-secondary); margin-bottom: 1em; font-size: 1em; } +/* Email Authentication Styles */ +.auth-divider { + margin: 1.5em 0; + position: relative; + text-align: center; +} + +.auth-divider::before { + content: ''; + position: absolute; + top: 50%; + left: 0; + right: 0; + height: 1px; + background: var(--border-color); +} + +.auth-divider span { + background: var(--falkor-secondary); + color: var(--text-secondary); + padding: 0 1em; + font-size: 0.9em; +} + +.email-auth-form { + display: flex; + flex-direction: column; + gap: 1em; + margin-top: 1em; +} + +.auth-input { + width: 100%; + padding: 0.75em; + border: 1px solid var(--border-color); + border-radius: 6px; + font-size: 1em; + background: var(--falkor-quaternary); + color: var(--text-primary); + box-sizing: border-box; +} + +.auth-input:focus { + outline: none; + border-color: #4285F4; + box-shadow: 0 0 0 2px rgba(66, 133, 244, 0.2); +} + +.auth-input::placeholder { + color: var(--text-secondary); +} + +.email-login-btn, .email-signup-btn { + width: 100%; + padding: 0.75em; + border: none; + border-radius: 6px; + font-size: 1.1em; + font-weight: 500; + cursor: pointer; + transition: background 0.2s; + margin-top: 0.5em; +} + +.email-login-btn { + background: #5B6BC0; + color: #fff; +} + +.email-login-btn:hover { + background: #4F5BA7; +} + +.email-signup-btn { + background: #5B6BC0; + color: #fff; +} + +.email-signup-btn:hover { + background: #4F5BA7; +} + +.email-login-btn:disabled, .email-signup-btn:disabled { + background: #cccccc; + color: #888888; + cursor: not-allowed; +} + +.auth-footer { + margin-top: 1.5em; + text-align: center; + color: var(--text-secondary); + font-size: 0.9em; +} + +.signup-link { + color: #4285F4; + text-decoration: none; + font-weight: 500; +} + +.signup-link:hover { + text-decoration: underline; +} + /* User Profile Dropdown */ .user-profile-dropdown { position: fixed; @@ -178,6 +283,9 @@ .user-profile-actions { padding: 10px; + display: flex; + flex-direction: column; + gap: 8px; /* space between API Tokens and Logout */ } /* Destructive Confirmation Styles */ @@ -247,7 +355,280 @@ .reset-confirmation-modal-content p { color: var(--text-secondary); - margin-bottom: 1.5em; font-size: 1em; line-height: 1.5; } + +/* Token Management Modal */ +.modal { + display: none; + position: fixed; + top: 0; + left: 0; + width: 100vw; + height: 100vh; + background: rgba(0,0,0,0.6); + z-index: 3000; + align-items: center; + justify-content: center; +} + +.modal .modal-content { + background: var(--falkor-secondary); + padding: 0; + border-radius: 10px; + box-shadow: 0 2px 16px rgba(0,0,0,0.2); + color: var(--text-primary); + min-width: 600px; + max-width: 90vw; + max-height: 90vh; + overflow: hidden; + display: flex; + flex-direction: column; +} + +.modal .modal-header { + background: var(--falkor-primary); + padding: 1.5em 2em; + border-bottom: 1px solid var(--border-color); + display: flex; + justify-content: space-between; + align-items: center; +} + +.modal .modal-header h2 { + margin: 0; + color: var(--text-primary); + font-size: 1.4em; +} + +.modal .close-btn { + background: none; + border: none; + font-size: 3em; + color: var(--text-secondary); + cursor: pointer; + padding: 0; + line-height: 1; +} + +.modal .close-btn:hover { + color: var(--text-primary); +} + +.modal .modal-body { + padding: 2em; + overflow-y: auto; + flex: 1; +} + +.tokens-description { + color: var(--text-secondary); + margin-bottom: 2em; + line-height: 1.6; +} + +.tokens-actions { + margin-bottom: 2em; +} + +.btn { + padding: 0.8em 1.5em; + border: none; + border-radius: 6px; + cursor: pointer; + font-size: 0.9em; + font-weight: 500; + transition: all 0.2s ease; + text-decoration: none; + display: inline-block; + text-align: center; +} + +.btn:disabled { + opacity: 0.6; + cursor: not-allowed; +} + +.btn-primary { + background: var(--accent-color); + color: white; +} + +.btn-primary:hover:not(:disabled) { + background: var(--accent-hover); +} + +.btn-secondary { + background: var(--bg-tertiary); + color: var(--text-primary); + border: 1px solid var(--border-color); +} + +.btn-secondary:hover:not(:disabled) { + background: var(--bg-secondary); +} + +.btn-danger { + background: #dc3545; + color: white; +} + +.btn-danger:hover:not(:disabled) { + background: #c82333; +} + +.btn-sm { + padding: 0.5em 1em; + font-size: 0.8em; +} + +.token-generation-result { + margin-bottom: 2em; +} + +.token-input-wrap { + display: flex; + align-items: center; + gap: 0.5em; + width: 100%; +} + +.token-input { + flex: 1 1 700px; /* allow input to grow, prefer large width */ + min-width: 300px; + padding: 0.6em 0.8em; + border: 1px solid var(--border-color); + border-radius: 6px; + color: var(--text-primary); + background: var(--falkor-quaternary); + overflow-x: auto; +} + +/* Styles for incremental database connection steps shown in the connect modal */ +#db-connection-steps { + margin: 16px 24px; + font-size: 14px; + color: var(--text-primary); +} + +#db-connection-steps-list { + list-style: none; + padding: 0; + margin: 0; + max-height: 220px; + overflow: auto; +} + +.db-connection-step { + display: flex; + align-items: center; + margin: 8px 0; +} + +.db-connection-step .step-icon { + display: inline-flex; + width: 20px; + height: 20px; + margin-right: 8px; + border-radius: 50%; + align-items: center; + justify-content: center; + font-size: 12px; + line-height: 20px; +} + +.db-connection-step .step-icon.pending { color: #1f6feb; } +.db-connection-step .step-icon.success { color: #16a34a; } +.db-connection-step .step-icon.error { color: #dc2626; } + +.alert { + padding: 1.5em; + border-radius: 6px; + margin-bottom: 1em; +} + +.alert-success { + background: rgba(40, 167, 69, 0.1); + border: 1px solid rgba(40, 167, 69, 0.3); + color: var(--text-primary); +} + +.alert h4 { + margin: 0 0 0.5em 0; + color: var(--text-primary); + font-size: 1.1em; +} + +.alert p { + margin: 0 0 1em 0; + color: var(--text-secondary); + line-height: 1.5; +} + +.token-display { + display: flex; + gap: 0.5em; + align-items: center; +} + +.token-input { + flex: 1; + padding: 0.8em; + font-size: 0.9em; + border: 1px solid var(--border-color); + border-radius: 6px; + background: var(--falkor-quaternary); + color: var(--text-primary); + word-break: break-all; +} + +.tokens-list h3 { + margin: 0 0 1.5em 0; + color: var(--text-primary); + font-size: 1.2em; + border-bottom: 1px solid var(--border-color); + padding-bottom: 0.5em; +} + +.no-tokens { + color: var(--text-secondary); + font-style: italic; + text-align: center; + padding: 2em; +} + +.tokens-table { + width: 100%; + border-collapse: collapse; + margin-top: 1em; +} + +.tokens-table th, +.tokens-table td { + padding: 1em; + text-align: left; + border-bottom: 1px solid var(--border-color); +} + +.tokens-table th { + background: var(--bg-tertiary); + color: var(--text-primary); + font-weight: 600; + font-size: 0.9em; +} + +.tokens-table td { + color: var(--text-secondary); +} + +.tokens-table tbody tr:hover { + background: var(--bg-tertiary); +} + +.modal-actions { + display: flex; + gap: 1em; + justify-content: flex-end; + margin-top: 2em; +} diff --git a/app/public/css/responsive.css b/app/public/css/responsive.css index 57f16e7c..fa729f56 100644 --- a/app/public/css/responsive.css +++ b/app/public/css/responsive.css @@ -2,6 +2,7 @@ /* Layout Responsive */ @media (max-width: 768px) { + /* When left toolbar is open, push content to make room */ body.left-toolbar-open .chat-container { max-width: calc(100vw - 48px); @@ -42,13 +43,14 @@ /* Ensure chat header elements are properly positioned when toolbar is open */ body.left-toolbar-open .chat-header { - padding-left: 15px; /* Add extra padding to prevent overlap */ + padding-left: 15px; + /* Add extra padding to prevent overlap */ width: 100%; box-sizing: border-box; } /* Ensure dropdown and buttons in header don't get cut off */ - body.left-toolbar-open .chat-header > * { + body.left-toolbar-open .chat-header>* { margin-left: 0; width: 100%; } @@ -84,15 +86,15 @@ padding: 10px 12px; font-size: 14px; } - + .chat-header h1 { font-size: 18px; } - + #message-input { font-size: 16px !important; } - + #message-input::placeholder { font-size: 16px !important; } @@ -110,14 +112,13 @@ flex-wrap: nowrap; align-items: stretch; } - + /* Hide vertical separators on mobile */ .vertical-separator { display: none; } - + /* Make selectors and buttons fit screen width */ - #graph-select, #custom-file-upload { flex: 1; min-width: 0; @@ -129,11 +130,7 @@ height: 40px; box-sizing: border-box; } - - #graph-select { - max-width: 30%; - } - + #custom-file-upload { max-width: 35%; text-align: center; @@ -142,17 +139,12 @@ justify-content: center; cursor: pointer; } - - .dropdown-container { - flex: 1; - max-width: 35%; - } - - .custom-dropdown { + + .header-button { width: 100%; height: 40px; } - + .dropdown-selected { padding: 8px 6px; font-size: 13px; @@ -162,14 +154,14 @@ align-items: center; justify-content: space-between; } - + .dropdown-text { text-overflow: ellipsis; white-space: nowrap; overflow: hidden; flex: 1; } - + .dropdown-arrow { margin-left: 4px; flex-shrink: 0; @@ -181,18 +173,17 @@ padding: 0; gap: 5px; } - - #graph-select, + #custom-file-upload { padding: 6px 4px; font-size: 12px; height: 36px; } - - .custom-dropdown { + + .header-button { height: 36px; } - + .dropdown-selected { padding: 6px 4px; font-size: 12px; @@ -216,7 +207,7 @@ transform: none; margin-left: 10px; } - + #reset-button svg { width: 18px; height: 18px; @@ -234,13 +225,16 @@ /* Reduce padding on mobile to maximize input space */ padding: 8px; gap: 4px; - min-width: 0; /* Allow container to shrink */ + min-width: 0; + /* Allow container to shrink */ flex-shrink: 1; } #message-input { - font-size: 16px !important; /* Prevent zoom on iOS */ - min-width: 0; /* Allow input to shrink */ + font-size: 16px !important; + /* Prevent zoom on iOS */ + min-width: 0; + /* Allow input to shrink */ } #message-input::placeholder { @@ -251,7 +245,8 @@ .input-button { width: 40px; height: 40px; - flex-shrink: 0; /* Prevent buttons from shrinking */ + flex-shrink: 0; + /* Prevent buttons from shrinking */ } } @@ -271,7 +266,7 @@ width: 40px; height: 40px; } - + #github-link-btn { top: 15px; right: 60px; @@ -279,7 +274,7 @@ font-size: 12px; height: 40px; } - + .theme-icon { width: 18px; height: 18px; @@ -311,4 +306,4 @@ min-width: 90vw; margin: 0 5vw; } -} +} \ No newline at end of file diff --git a/app/public/css/themes.css b/app/public/css/themes.css index 51b79b4c..7683eb34 100644 --- a/app/public/css/themes.css +++ b/app/public/css/themes.css @@ -1,9 +1,5 @@ /* Theme-specific Styles and Overrides */ -[data-theme="light"] .logo { - filter: invert(1); -} - /* Theme icon states */ [data-theme="dark"] .theme-icon .sun, [data-theme="system"] .theme-icon .sun { diff --git a/app/public/icons/queryweaver.webp b/app/public/icons/queryweaver.webp index 64378466..cbdeafc1 100644 Binary files a/app/public/icons/queryweaver.webp and b/app/public/icons/queryweaver.webp differ diff --git a/app/templates/base.j2 b/app/templates/base.j2 index b9a7e36c..4366c641 100644 --- a/app/templates/base.j2 +++ b/app/templates/base.j2 @@ -17,7 +17,7 @@ {% endif %} - + {% if google_tag_manager_id %} diff --git a/app/templates/chat.j2 b/app/templates/chat.j2 index 60d7524c..637aa435 100644 --- a/app/templates/chat.j2 +++ b/app/templates/chat.j2 @@ -28,6 +28,9 @@ {% include 'components/login_modal.j2' %} {% include 'components/database_modal.j2' %} {% include 'components/reset_modal.j2' %} + {% if is_authenticated %} + {% include 'components/token_modal.j2' %} + {% endif %} {% endblock %} {% block scripts %} diff --git a/app/templates/components/chat_header.j2 b/app/templates/components/chat_header.j2 index 6bad7b49..345b6cc3 100644 --- a/app/templates/components/chat_header.j2 +++ b/app/templates/components/chat_header.j2 @@ -3,10 +3,15 @@

Natural Language to SQL Generator

- +
- -
+
\ No newline at end of file diff --git a/app/templates/components/chat_input.j2 b/app/templates/components/chat_input.j2 index ac50e12e..b7fa6ea4 100644 --- a/app/templates/components/chat_input.j2 +++ b/app/templates/components/chat_input.j2 @@ -10,8 +10,9 @@ - + \ No newline at end of file diff --git a/app/templates/components/database_modal.j2 b/app/templates/components/database_modal.j2 index 7795401a..ba7c383a 100644 --- a/app/templates/components/database_modal.j2 +++ b/app/templates/components/database_modal.j2 @@ -1,8 +1,38 @@ {# Database connection modal #} -
+
-

Connect to Database

+

Connect to Database

+ + +
+ + + +
+ +
+ + +
+
    +
    +
    - + diff --git a/app/templates/components/login_modal.j2 b/app/templates/components/login_modal.j2 index d0afeebb..cb67acf9 100644 --- a/app/templates/components/login_modal.j2 +++ b/app/templates/components/login_modal.j2 @@ -1,13 +1,110 @@ {# Login modal for authentication #} -