diff --git a/.gemini/commands/sc.help.toml b/.gemini/commands/sc.help.toml index 2feeb5d..e3a9859 100644 --- a/.gemini/commands/sc.help.toml +++ b/.gemini/commands/sc.help.toml @@ -3,7 +3,7 @@ # SPDX-License-Identifier: MIT description = "List all available /sc commands and their functionality" -prompt = """ +prompt = ''' --- name: help description: "List all available /sc commands and their functionality" @@ -96,7 +96,7 @@ SuperClaude supports behavioral flags to enable specific execution modes and too | Flag | Trigger | Behavior | |------|---------|----------| -| `--delegate [auto\|files\|folders]` | >7 directories OR >50 files | Enable sub-agent parallel processing with intelligent routing | +| `--delegate [auto\\|files\\|folders]` | >7 directories OR >50 files | Enable sub-agent parallel processing with intelligent routing | | `--concurrency [n]` | Resource optimization needs | Control max concurrent operations (range: 1-15) | | `--loop` | Improvement keywords (polish, refine, enhance) | Enable iterative improvement cycles with validation gates | | `--iterations [n]` | Specific improvement cycle requirements | Set improvement cycle count (range: 1-10) | @@ -153,4 +153,4 @@ SuperClaude supports behavioral flags to enable specific execution modes and too **Note:** This list is manually generated and may become outdated. If you suspect it is inaccurate, please consider regenerating it or contacting a maintainer. -""" +''' diff --git a/.gitconfig b/.gitconfig new file mode 100644 index 0000000..6d874da --- /dev/null +++ b/.gitconfig @@ -0,0 +1,14 @@ + +# repo-level .gitconfig to setup git drivers for codeweaver +[core] + attributesfile = .gitattributes + excludesfile = .gitignore + autocrlf = false + symlinks = true + +[alias] + root = "rev-parse --show-toplevel" + +[merge "ours"] + name = "Keep ours merge" + driver = true diff --git a/.gitconfig.license b/.gitconfig.license new file mode 100644 index 0000000..3a9c57d --- /dev/null +++ b/.gitconfig.license @@ -0,0 +1,4 @@ +SPDX-FileCopyrightText: 2025 Knitli Inc. +SPDX-FileContributor: Adam Poulemanos + +SPDX-License-Identifier: MIT OR Apache-2.0 diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 2439d3b..2a08f2d 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -6,7 +6,6 @@ paths: .github/workflows/cla.yml: ignore: - - 'property "is_member" is not defined in object type {}' - - > + - property "is_member" is not defined in object type {} + - | "github.event.pull_request.title" is potentially untrusted. avoid using it directly in inline scripts. instead, pass it through an environment variable. see https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions for more details - diff --git a/.github/dependabot.yml b/.github/dependabot.yml index fd2516a..ecbb324 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -10,19 +10,19 @@ updates: - version-update:semver-minor - version-update:semver-patch versioning-strategy: increase - directory: "/" + directory: / schedule: interval: daily - time: "23:00" + time: 23:00 open-pull-requests-limit: 10 commit-message: - prefix: "deps" + prefix: deps assignees: - - codegen-sh[bot] + - copilot[bot] reviewers: - - codegen-sh[bot] + - copilot[bot] labels: - - "dependencies" + - dependencies rebase-strategy: auto - package-ecosystem: github-actions dependency-type: all @@ -31,17 +31,17 @@ updates: - version-update:semver-minor - version-update:semver-patch versioning-strategy: increase - directory: "/.github/workflows" + directory: /.github/workflows schedule: interval: daily - time: "23:00" + time: 23:00 open-pull-requests-limit: 10 commit-message: - prefix: "deps" + prefix: deps assignees: - - codegen-sh[bot] + - copilot[bot] reviewers: - - codegen-sh[bot] + - copilot[bot] labels: - - "dependencies" + - dependencies rebase-strategy: auto diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebd3afb..d39d3b9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,20 +5,24 @@ # ! GitHub Action to run the CI pipeline for Thread # ! Comprehensive CI with multi-platform testing, WASM builds, and security scanning name: CI - on: push: - branches: [main, develop, staging, "001-*"] + branches: + - main + - develop + - staging + - 001-* pull_request: - branches: [main, develop, staging] + branches: + - main + - develop + - staging workflow_dispatch: - env: RUST_BACKTRACE: 1 CARGO_TERM_COLOR: always CARGO_INCREMENTAL: 0 - RUSTFLAGS: "-D warnings" - + RUSTFLAGS: -D warnings jobs: # Quick formatting and linting checks that fail fast quick-checks: @@ -28,26 +32,23 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - + - run: | + sudo apt update && sudo apt install -y clang llvm-dev - name: Install Rust uses: dtolnay/rust-toolchain@stable with: + toolchain: nightly components: rustfmt, clippy - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - - name: Check formatting - run: cargo fmt --all -- --check - + run: cargo fmt --all -- --config-path ./rustfmt.toml --check - name: Run clippy run: cargo clippy --workspace --all-features --all-targets -- -D warnings - - name: Check typos uses: crate-ci/typos@v1.16.23 - # Test matrix for multiple platforms and Rust versions test: name: Test (${{ matrix.os }}, ${{ matrix.rust }}) @@ -55,8 +56,12 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - rust: [stable] + os: + - ubuntu-latest + - macos-latest + - windows-latest + rust: + - stable include: # Also test on beta and nightly on Linux - os: ubuntu-latest @@ -68,29 +73,29 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install Rust ${{ matrix.rust }} uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.rust }} - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 with: key: ${{ matrix.os }}-${{ matrix.rust }} cache-on-failure: true - - name: Install cargo-nextest uses: taiki-e/install-action@v2 with: tool: cargo-nextest - - name: Run tests (nextest) - run: cargo nextest run --all-features --no-fail-fast - + # thread-language's napi-* features conflict with tree-sitter-parsing at runtime; + # run it separately with only the compatible feature set. + run: | + cargo nextest run --workspace --exclude thread-language --all-features --no-fail-fast + cargo nextest run -p thread-language --features all-parsers,matching --no-fail-fast - name: Run doc tests - run: cargo test --doc --all-features - + run: | + cargo test --doc --workspace --exclude thread-language --all-features + cargo test --doc -p thread-language --features all-parsers,matching # Build and test WASM target for Edge deployment wasm: name: WASM Build & Test @@ -100,26 +105,20 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install Rust uses: dtolnay/rust-toolchain@stable with: targets: wasm32-unknown-unknown - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - - name: Install wasm-pack uses: jetli/wasm-pack-action@v0.4.0 - - name: Build WASM (dev) run: cargo run -p xtask build-wasm - - name: Build WASM (release) run: cargo run -p xtask build-wasm --release - - name: Upload WASM artifacts uses: actions/upload-artifact@v4 with: @@ -129,7 +128,6 @@ jobs: thread_wasm.js thread_wasm.d.ts retention-days: 7 - # Performance benchmarks (only on main branch or manual trigger) benchmark: name: Benchmarks @@ -140,25 +138,20 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install Rust uses: dtolnay/rust-toolchain@stable - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - - name: Run benchmarks run: cargo bench --workspace --no-fail-fast -- --output-format bencher | tee benchmark-results.txt - - name: Upload benchmark results uses: actions/upload-artifact@v4 with: name: benchmark-results-${{ github.sha }} path: benchmark-results.txt retention-days: 30 - # Security audit with cargo-audit security_audit: name: Security Audit @@ -168,18 +161,14 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install Rust uses: dtolnay/rust-toolchain@stable - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 - - name: Run cargo-audit uses: rustsec/audit-check@v1.4.1 with: token: ${{ secrets.GITHUB_TOKEN }} - # License compliance check with REUSE license: name: License Compliance @@ -187,10 +176,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: REUSE Compliance Check uses: fsfe/reuse-action@v2 - # Code coverage (only on main or PRs to main) coverage: name: Code Coverage @@ -201,30 +188,27 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install Rust uses: dtolnay/rust-toolchain@stable with: components: llvm-tools-preview - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 - - name: Install cargo-llvm-cov uses: taiki-e/install-action@v2 with: tool: cargo-llvm-cov - - name: Generate coverage - run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info - + run: | + cargo llvm-cov --no-report --workspace --exclude thread-language --all-features + cargo llvm-cov --no-report -p thread-language --features all-parsers,matching + cargo llvm-cov --no-run --lcov --output-path lcov.info - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: files: lcov.info fail_ci_if_error: false token: ${{ secrets.CODECOV_TOKEN }} - # Integration tests with Postgres (only on main or manual) integration: name: Integration Tests @@ -238,34 +222,25 @@ jobs: POSTGRES_USER: postgres POSTGRES_PASSWORD: postgres POSTGRES_DB: thread_test - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 ports: - 5432:5432 steps: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install Rust uses: dtolnay/rust-toolchain@stable - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 - - name: Install cargo-nextest uses: taiki-e/install-action@v2 with: tool: cargo-nextest - - name: Run integration tests env: DATABASE_URL: postgresql://postgres:postgres@localhost:5432/thread_test run: cargo nextest run --all-features --test integration_tests --test d1_integration_test - # Performance regression tests (on PRs and main) performance_regression: name: Performance Regression Tests @@ -276,34 +251,28 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install Rust uses: dtolnay/rust-toolchain@stable - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - - name: Install cargo-nextest uses: taiki-e/install-action@v2 with: tool: cargo-nextest - - name: Run performance regression tests run: | cargo nextest run --manifest-path crates/flow/Cargo.toml \ --all-features \ --test performance_regression_tests \ -- --nocapture - - name: Check for regressions if: failure() run: | echo "⚠️ Performance regression detected!" echo "Review test output above for specific failures." exit 1 - # Load testing benchmarks (manual trigger or main branch only) load_testing: name: Load Testing Benchmarks @@ -314,46 +283,37 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install Rust uses: dtolnay/rust-toolchain@stable - - name: Cache Rust dependencies uses: Swatinem/rust-cache@v2 with: cache-on-failure: true - - name: Run load test benchmarks run: | cargo bench --manifest-path crates/flow/Cargo.toml \ --bench load_test \ --all-features \ -- --output-format bencher | tee load-test-results.txt - - name: Upload load test results uses: actions/upload-artifact@v4 with: name: load-test-results-${{ github.sha }} path: load-test-results.txt retention-days: 90 - - name: Compare with baseline (if exists) continue-on-error: true - run: | - if [ -f .benchmark-baseline/load-test-baseline.txt ]; then - echo "πŸ“Š Comparing with baseline..." - # Simple diff for now - could enhance with criterion-compare - diff .benchmark-baseline/load-test-baseline.txt load-test-results.txt || true - else - echo "πŸ“ No baseline found - this will become the baseline" - mkdir -p .benchmark-baseline - cp load-test-results.txt .benchmark-baseline/load-test-baseline.txt - fi - + run: "if [ -f .benchmark-baseline/load-test-baseline.txt ]; then\n echo \"\U0001F4CA Comparing with baseline...\"\n # Simple diff for now - could enhance with criterion-compare\n diff .benchmark-baseline/load-test-baseline.txt load-test-results.txt || true\nelse\n echo \"\U0001F4DD No baseline found - this will become the baseline\"\n mkdir -p .benchmark-baseline\n cp load-test-results.txt .benchmark-baseline/load-test-baseline.txt\nfi\n" # Final success check - all required jobs must pass ci-success: name: CI Success - needs: [quick-checks, test, wasm, security_audit, license, performance_regression] + needs: + - quick-checks + - test + - wasm + - security_audit + - license + - performance_regression if: always() runs-on: ubuntu-latest steps: @@ -369,3 +329,340 @@ jobs: exit 1 fi echo "βœ… All required jobs passed!" +--- +# SPDX-FileCopyrightText: 2025 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +# ! GitHub Action for automated releases +# ! Builds and publishes releases for multiple platforms +name: Release +on: + push: + tags: + - v*.*.* + workflow_dispatch: + inputs: + version: + description: Version to release (e.g., 0.1.0) + required: true + type: string +env: + CARGO_TERM_COLOR: always + CARGO_INCREMENTAL: 0 +permissions: + contents: write + packages: write +jobs: + # Create GitHub release + create-release: + name: Create Release + runs-on: ubuntu-latest + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + version: ${{ steps.get_version.outputs.version }} + steps: + - run: | + apt update && sudo apt install -y clang llvm-dev + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Get version + id: get_version + env: + INPUT_VERSION: ${{ github.event.inputs.version }} + REF_NAME: ${{ github.ref }} + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + VERSION="${INPUT_VERSION}" + else + VERSION="${REF_NAME#refs/tags/v}" + fi + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + echo "Version: ${VERSION}" + - name: Generate changelog + id: changelog + env: + VERSION: ${{ steps.get_version.outputs.version }} + run: | + # Extract changelog for this version + if [ -f "CHANGELOG.md" ]; then + CHANGELOG="$(sed -n "/## \[${VERSION}\]/,/## \[/p" CHANGELOG.md | sed '$ d')" + else + CHANGELOG="Release ${VERSION}" + fi + { + echo "changelog<> "$GITHUB_OUTPUT" + - name: Create GitHub Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: v${{ steps.get_version.outputs.version }} + release_name: Release ${{ steps.get_version.outputs.version }} + body: ${{ steps.changelog.outputs.changelog }} + draft: false + prerelease: false + # Build CLI binaries for multiple platforms + build-cli: + name: Build CLI (${{ matrix.target }}) + needs: create-release + strategy: + fail-fast: false + matrix: + include: + # Linux x86_64 + - target: x86_64-unknown-linux-gnu + os: ubuntu-latest + cross: false + strip: true + # Linux x86_64 (musl for static linking) + - target: x86_64-unknown-linux-musl + os: ubuntu-latest + cross: true + strip: true + # Linux ARM64 + - target: aarch64-unknown-linux-gnu + os: ubuntu-latest + cross: true + strip: false + # macOS x86_64 + - target: x86_64-apple-darwin + os: macos-latest + cross: false + strip: true + # macOS ARM64 (Apple Silicon) + - target: aarch64-apple-darwin + os: macos-latest + cross: false + strip: true + # Windows x86_64 + - target: x86_64-pc-windows-msvc + os: windows-latest + cross: false + strip: false + ext: .exe + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + with: + key: ${{ matrix.target }} + - name: Install cross (if needed) + if: matrix.cross + run: cargo install cross --git https://github.com/cross-rs/cross + - name: Build release binary + env: + TARGET: ${{ matrix.target }} + USE_CROSS: ${{ matrix.cross }} + run: | + if [ "${USE_CROSS}" == "true" ]; then + cross build --release --target "${TARGET}" --features parallel,caching + else + cargo build --release --target "${TARGET}" --features parallel,caching + fi + shell: bash + - name: Strip binary (if applicable) + if: matrix.strip + env: + TARGET: ${{ matrix.target }} + EXT: ${{ matrix.ext }} + run: | + strip "target/${TARGET}/release/thread${EXT}" + shell: bash + - name: Create archive + id: archive + env: + VERSION: ${{ needs.create-release.outputs.version }} + TARGET: ${{ matrix.target }} + OS_TYPE: ${{ matrix.os }} + run: | + ARCHIVE_NAME="thread-${VERSION}-${TARGET}" + if [ "${OS_TYPE}" == "windows-latest" ]; then + 7z a "${ARCHIVE_NAME}.zip" "./target/${TARGET}/release/thread.exe" + echo "asset_path=${ARCHIVE_NAME}.zip" >> "$GITHUB_OUTPUT" + echo "asset_content_type=application/zip" >> "$GITHUB_OUTPUT" + else + tar czf "${ARCHIVE_NAME}.tar.gz" -C "target/${TARGET}/release" thread + echo "asset_path=${ARCHIVE_NAME}.tar.gz" >> "$GITHUB_OUTPUT" + echo "asset_content_type=application/gzip" >> "$GITHUB_OUTPUT" + fi + echo "asset_name=${ARCHIVE_NAME}" >> "$GITHUB_OUTPUT" + shell: bash + - name: Upload release asset + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-release.outputs.upload_url }} + asset_path: ${{ steps.archive.outputs.asset_path }} + asset_name: ${{ steps.archive.outputs.asset_name }}${{ matrix.os == 'windows-latest' && '.zip' || '.tar.gz' }} + asset_content_type: ${{ steps.archive.outputs.asset_content_type }} + # Build and publish WASM package + build-wasm: + name: Build & Publish WASM + needs: create-release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-unknown-unknown + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + - name: Install wasm-pack + uses: jetli/wasm-pack-action@v0.4.0 + - name: Build WASM package + run: cargo run -p xtask build-wasm --release + - name: Create WASM archive + env: + VERSION: ${{ needs.create-release.outputs.version }} + run: | + ARCHIVE_NAME="thread-wasm-${VERSION}" + tar czf "${ARCHIVE_NAME}.tar.gz" \ + thread_wasm_bg.wasm \ + thread_wasm.js \ + thread_wasm.d.ts \ + package.json + - name: Upload WASM archive + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + VERSION: ${{ needs.create-release.outputs.version }} + with: + upload_url: ${{ needs.create-release.outputs.upload_url }} + asset_path: thread-wasm-${{ needs.create-release.outputs.version }}.tar.gz + asset_name: thread-wasm-${{ needs.create-release.outputs.version }}.tar.gz + asset_content_type: application/gzip + # Build Docker images + build-docker: + name: Build Docker Images + needs: create-release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=semver,pattern={{version}},value=v${{ needs.create-release.outputs.version }} + type=semver,pattern={{major}}.{{minor}},value=v${{ needs.create-release.outputs.version }} + type=semver,pattern={{major}},value=v${{ needs.create-release.outputs.version }} + type=raw,value=latest + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + # Publish to crates.io (optional, requires CARGO_REGISTRY_TOKEN) + publish-crates: + name: Publish to crates.io + needs: + - create-release + - build-cli + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + - name: Publish to crates.io + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + run: | + # Publish in dependency order + cargo publish -p thread-utils --allow-dirty || echo "Package already published" + cargo publish -p thread-language --allow-dirty || echo "Package already published" + cargo publish -p thread-ast-engine --allow-dirty || echo "Package already published" + cargo publish -p thread-rule-engine --allow-dirty || echo "Package already published" + cargo publish -p thread-services --allow-dirty || echo "Package already published" + cargo publish -p thread-flow --allow-dirty || echo "Package already published" + cargo publish -p thread-wasm --allow-dirty || echo "Package already published" + # Deploy to Cloudflare Workers (Edge deployment) + deploy-edge: + name: Deploy to Cloudflare Edge + needs: + - create-release + - build-wasm + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + environment: + name: production-edge + url: https://thread.knitli.com + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-unknown-unknown + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + - name: Install wasm-pack + uses: jetli/wasm-pack-action@v0.4.0 + - name: Build WASM for Workers + run: cargo run -p xtask build-wasm --release + - name: Deploy to Cloudflare Workers + uses: cloudflare/wrangler-action@v3 + with: + apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} + accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + command: deploy --env production + # Release notification + notify: + name: Release Notification + needs: + - create-release + - build-cli + - build-wasm + - build-docker + runs-on: ubuntu-latest + if: always() + steps: + - name: Check release status + env: + VERSION: ${{ needs.create-release.outputs.version }} + CLI_RESULT: ${{ needs.build-cli.result }} + WASM_RESULT: ${{ needs.build-wasm.result }} + DOCKER_RESULT: ${{ needs.build-docker.result }} + run: | + echo "Release v${VERSION} completed" + echo "CLI builds: ${CLI_RESULT}" + echo "WASM build: ${WASM_RESULT}" + echo "Docker build: ${DOCKER_RESULT}" diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index 252fdc4..52d7104 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -4,177 +4,27 @@ # SPDX-License-Identifier: MIT OR Apache-2.0 # ! GitHub Action to check CLA signatures for Knitli repositories # ! This action is triggered on issue comments and pull request events. -name: "CLA Assistant" +name: CLA Assistant on: issue_comment: - types: [created] + types: + - created pull_request_target: - types: [opened, closed, synchronize] + types: + - opened + - closed + - synchronize +env: + CLA_ACCESS_TOKEN: ${{ secrets.GITHUB_TOKEN }} permissions: actions: write - contents: write # this can be 'read' if the signatures are in remote repository + contents: read pull-requests: write statuses: write jobs: - set-pr-title: - runs-on: ubuntu-latest - if: > - github.event_name == 'pull_request_target' - - steps: - - name: "Set PR Title" - env: - EV_TITLE: ${{ github.event.pull_request.title }} - EV_BODY: ${{ github.event.pull_request.body }} - run: | - echo "${EV_TITLE}" - set-issue-title: - runs-on: ubuntu-latest - if: > - github.event_name == 'issue_comment' - - steps: - - name: "Set Issue Title" - env: - EV_TITLE: ${{ toJson(github.event.issue.title) }} - EV_BODY: ${{ toJson(github.event.comment.body) }} - run: | - echo "${EV_TITLE}" - check-cla: - runs-on: ubuntu-latest - steps: - - name: "SetVariables" - run: | - # shellcheck disable=SC2296 - # This script sets up environment variables based on the GitHub event context. - echo "Setting up variables..." - repo="${{ github.repository }}" - if [[ $repo != knitli* ]]; then - echo "This action is only for Knitli repositories, exiting..." - echo "looks like we're in a forked repository, exiting..." - exit 0 - fi - actor="${{ github.actor }}" - echo "EV_ACTOR=$actor" >> "$GITHUB_ENV" - event="${{ github.event_name }}" - event="${event//_target/}" - event="${event//_comment/}" - if [[ $event == pull_request* ]]; then - author="${{ github.event.pull_request.user.login }}" - email="${{ github.event.pull_request.user.email }}" - { - echo "IS_PR=true"; - echo "IS_ISSUE=false"; - echo "EV_NUMBER=\"${{ github.event.pull_request.number }}\""; - echo "EV_AUTHOR=\"$author\""; - echo "EV_URL=\"${{ github.event.pull_request.html_url }}\""; - echo "EV_EMAIL=\"$email\""; - echo "IS_RECHECK=false"; - } >> "$GITHUB_ENV" - else - author="${{ github.event.issue.user.login }}" - email="${{ github.event.issue.user.email }}" - { - echo "IS_PR=false"; - echo "IS_ISSUE=true"; - echo "EV_NUMBER=\"${{ github.event.issue.number }}\""; - echo "EV_AUTHOR=\"$author\""; - echo "EV_URL=\"${{ github.event.issue.html_url }}\""; - echo "EV_EMAIL=\"$email\""; - } >> "$GITHUB_ENV" - if [[ "$EV_BODY" == 'recheck' || "$EV_BODY" == *'I read the contributors license agreement and I agree to it.'* ]]; then - echo "IS_RECHECK=true" >> "$GITHUB_ENV" - else - echo "IS_RECHECK=false" >> "$GITHUB_ENV" - fi - fi - # if it's a rerun of the action, then the author is the actor - if [[ -z $author ]] || [[ $author != "$actor" ]]; then - author="$actor" - if [[ -z $email ]]; then - email="${author}@users.noreply.github.com" - fi - echo "EV_AUTHOR=$author" >> "$GITHUB_ENV" - echo "EV_EMAIL=$email" >> "$GITHUB_ENV" - fi - response=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ - "https://api.github.com/orgs/knitli/members/$author") - if [ "$response" == "204" ]; then - echo "is_member=true" >> "$GITHUB_OUTPUT" - echo "User $author is a member of Knitli." - echo "MEMBER=true" >> "$GITHUB_ENV" - else - if [[ $email == *@knit.li || $email == *@knitli.com || $author == bashandbone ]]; then - echo "MEMBER=true" >> "$GITHUB_ENV" - echo "User $author has a Knitli email or is its founder. Provided email: $email" - echo "is_member=true" >> "$GITHUB_OUTPUT" - else - echo "MEMBER=false" >> "$GITHUB_ENV" - echo "is_member=false" >> "$GITHUB_OUTPUT" - fi - fi - cla-assistant: - needs: check-cla - if: > - (needs.check-cla.outputs.is_member && needs.check-cla.outputs.is_member == 'false' && needs.check-cla.outputs.is_member != 'true') || needs.check-cla.outputs.is_member == '' - - runs-on: ubuntu-latest - steps: - - name: Debug - run: | - if [[ $DEBUG_ACTIONS == 'true' ]]; then - printenv - fi - - name: "CLA Assistant" - if: > - (env.IS_RECHECK && env.IS_PR) && (env.IS_RECHECK == 'true' || env.IS_PR == 'true') - - uses: contributor-assistant/github-action@v2.6.1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - path-to-signatures: 'cla.json' - path-to-document: 'https://github.com/knitli/thread/blob/main/CONTRIBUTORS_LICENSE_AGREEMENT.md' - branch: 'staging' - allowlist: > - bashandbone,codegen-sh[bot],dependabot[bot],github-actions[bot],actions-user,changeset-bot,claude - - create-file-commit-message: 'Adding file for tracking CLA signatures' - signed-commit-message: > - $env.EV_AUTHOR signed πŸ–ŠοΈ the Thread 🧡 CLA in [$env.GITHUB_REPOSITORY # $env.EV_NUMBER]($env.EV_URL) - - custom-notsigned-prcomment: | - βœ‹πŸ›‘ Hey $env.EV_AUTHOR, - - ## Thanks for your contribution to Thread! - - ### You need to agree to the CLA first... πŸ–ŠοΈ - - Before we can accept your (awesome) contribution, **we need you to agree to our contributors license agreement (CLA)**. πŸ–ŠοΈ - - ### To agree to the CLA, please comment: - > I read the contributors license agreement and I agree to it. - Those words are important[^1], so please don't change them. πŸ˜‰ - - [^1]: Our bot needs those *exact* words to recognize that you agree to the CLA. If you want to add something else, please do so after those words. πŸ˜‰ - custom-pr-sign-comment: | - $env.EV_AUTHOR, agrees to the Thread CLA. - - $env.EV_AUTHOR acknowledges they read and agree to the [Thread contributors license agreement](https://github.com/knitli/thread/blob/main/CONTRIBUTORS_LICENSE_AGREEMENT.md). - custom-allsigned-prcomment: | - ## πŸš€ GOOD TO GO. Everyone has agreed to the CLA. πŸ‘ - - ### Thanks for your contribution to Thread! 🧡 - Your contribution is now ready to be merged[^1]. πŸŽ‰ - - ### Maintainers: Ship this PR! πŸ“¦πŸš€ - - [^1]: If it passes the other CI checks, of course. πŸ˜‰ I'm just here for the legal stuff. - # UNUSED OPTIONS - #lock-pullrequest-aftermerge: false - if you don't want this bot to automatically lock the pull request after merging (default - true) - #use-dco-flag: true - If you are using DCO instead of CLA - #TODO: move the signatures to a remote repository - #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) - #remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository) - # PERSONAL_ACCESS_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} + cla-check: + uses: knitli/.github/.github/workflows/cla-check.yml@main + with: + repo_name: thread + cla_document_url: https://github.com/knitli/thread/blob/main/CONTRIBUTORS_LICENSE_AGREEMENT.md + secrets: inherit diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml index 38a0e67..029aad5 100644 --- a/.github/workflows/claude.yml +++ b/.github/workflows/claude.yml @@ -6,13 +6,19 @@ name: Claude Assistant on: issue_comment: - types: [created] + types: + - created pull_request_review_comment: - types: [created] + types: + - created issues: - types: [opened, assigned, labeled] + types: + - opened + - assigned + - labeled pull_request_review: - types: [submitted] + types: + - submitted permissions: actions: read checks: write @@ -24,17 +30,17 @@ jobs: claude-response: runs-on: ubuntu-latest steps: - - name: "PR Review" + - name: PR Review if: github.event_name == 'pull_request_review' uses: anthropics/claude-code-action@beta with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} github_token: ${{ secrets.GITHUB_TOKEN }} - trigger_phrase: "@claude" - mode: "agent" - assignee_trigger: "claude" - label_trigger: "claude" - base_branch: "staging" + trigger_phrase: '@claude' + mode: agent + assignee_trigger: claude + label_trigger: claude + base_branch: staging max_turns: "30" allowed_tools: &allowed_tools | mcp__context7__resolve-library-id @@ -93,18 +99,17 @@ jobs: If you identify issues, briefly describe them. Provide a recommended fix with example implementation. Keep your feedback focused, actionable, and concise. - - - name: "Issue Opened" + - name: Issue Opened if: github.event_name == 'issues' && github.event.action == 'opened' uses: anthropics/claude-code-action@beta with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} github_token: ${{ secrets.GITHUB_TOKEN }} - trigger_phrase: "@claude" - assignee_trigger: "claude" - mode: "agent" - label_trigger: "claude" - base_branch: "staging" + trigger_phrase: '@claude' + assignee_trigger: claude + mode: agent + label_trigger: claude + base_branch: staging max_turns: "30" allowed_tools: *allowed_tools mcp_config: *mcp_config @@ -117,18 +122,17 @@ jobs: - If the issue covers the same topic as an existing open or closed issue, recommend closing the issue and linking to the relevant PR or issue. - Identify potential fixes and briefly describe them with links to relevant code. - If it's a feature request, estimate the difficulty of implementing the feature and potential impact on existing functionality and API. - - - name: "PR Review Comment" + - name: PR Review Comment if: github.event_name == 'pull_request_review_comment' uses: anthropics/claude-code-action@beta with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} github_token: ${{ secrets.GITHUB_TOKEN }} - trigger_phrase: "@claude" - assignee_trigger: "claude" - label_trigger: "claude" - mode: "agent" - base_branch: "staging" + trigger_phrase: '@claude' + assignee_trigger: claude + label_trigger: claude + mode: agent + base_branch: staging max_turns: "30" allowed_tools: *allowed_tools mcp_config: *mcp_config @@ -140,20 +144,18 @@ jobs: - Consider possible security or performance effects. - Suggest improvements or alternatives where applicable. - If the changes are satisfactory and the code passes checks, approve the PR with a comment. - - - name: "Issue Assigned or Labeled Claude" - if: > - (github.event_name == 'issues' && github.event.action == 'assigned') || - (github.event_name == 'issues' && github.event.action == 'labeled' && github.event.label.name == 'claude') + - name: Issue Assigned or Labeled Claude + if: | + (github.event_name == 'issues' && github.event.action == 'assigned') || (github.event_name == 'issues' && github.event.action == 'labeled' && github.event.label.name == 'claude') uses: anthropics/claude-code-action@beta with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} github_token: ${{ secrets.GITHUB_TOKEN }} - trigger_phrase: "@claude" - assignee_trigger: "claude" - mode: "agent" - label_trigger: "claude" - base_branch: "staging" + trigger_phrase: '@claude' + assignee_trigger: claude + mode: agent + label_trigger: claude + base_branch: staging max_turns: "30" allowed_tools: *allowed_tools mcp_config: *mcp_config diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2d6e480..5847822 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,364 +5,332 @@ # ! GitHub Action for automated releases # ! Builds and publishes releases for multiple platforms name: Release - on: - push: - tags: - - "v*.*.*" - workflow_dispatch: - inputs: - version: - description: "Version to release (e.g., 0.1.0)" - required: true - type: string - + push: + tags: + - v*.*.* + workflow_dispatch: + inputs: + version: + description: Version to release (e.g., 0.1.0) + required: true + type: string env: - CARGO_TERM_COLOR: always - CARGO_INCREMENTAL: 0 - + CARGO_TERM_COLOR: always + CARGO_INCREMENTAL: 0 permissions: - contents: write - packages: write - + contents: write + packages: write jobs: - # Create GitHub release - create-release: - name: Create Release - runs-on: ubuntu-latest - outputs: - upload_url: ${{ steps.create_release.outputs.upload_url }} - version: ${{ steps.get_version.outputs.version }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Get version - id: get_version - env: - INPUT_VERSION: ${{ github.event.inputs.version }} - REF_NAME: ${{ github.ref }} - run: | - if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then - VERSION="${INPUT_VERSION}" - else - VERSION="${REF_NAME#refs/tags/v}" - fi - echo "version=${VERSION}" >> "$GITHUB_OUTPUT" - echo "Version: ${VERSION}" - - - name: Generate changelog - id: changelog - env: - VERSION: ${{ steps.get_version.outputs.version }} - run: | - # Extract changelog for this version - if [ -f "CHANGELOG.md" ]; then - CHANGELOG="$(sed -n "/## \[${VERSION}\]/,/## \[/p" CHANGELOG.md | sed '$ d')" - else - CHANGELOG="Release ${VERSION}" - fi - echo "changelog<> "$GITHUB_OUTPUT" - echo "${CHANGELOG}" >> "$GITHUB_OUTPUT" - echo "EOF" >> "$GITHUB_OUTPUT" - - - name: Create GitHub Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: v${{ steps.get_version.outputs.version }} - release_name: Release ${{ steps.get_version.outputs.version }} - body: ${{ steps.changelog.outputs.changelog }} - draft: false - prerelease: false - - # Build CLI binaries for multiple platforms - build-cli: - name: Build CLI (${{ matrix.target }}) - needs: create-release - strategy: - fail-fast: false - matrix: - include: - # Linux x86_64 - - target: x86_64-unknown-linux-gnu - os: ubuntu-latest - cross: false - strip: true - - # Linux x86_64 (musl for static linking) - - target: x86_64-unknown-linux-musl - os: ubuntu-latest - cross: true - strip: true - - # Linux ARM64 - - target: aarch64-unknown-linux-gnu - os: ubuntu-latest - cross: true - strip: false - - # macOS x86_64 - - target: x86_64-apple-darwin - os: macos-latest - cross: false - strip: true - - # macOS ARM64 (Apple Silicon) - - target: aarch64-apple-darwin - os: macos-latest - cross: false - strip: true - - # Windows x86_64 - - target: x86_64-pc-windows-msvc - os: windows-latest - cross: false - strip: false - ext: .exe - - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - with: - targets: ${{ matrix.target }} - - - name: Cache Rust dependencies - uses: Swatinem/rust-cache@v2 - with: - key: ${{ matrix.target }} - - - name: Install cross (if needed) - if: matrix.cross - run: cargo install cross --git https://github.com/cross-rs/cross - - - name: Build release binary - env: - TARGET: ${{ matrix.target }} - USE_CROSS: ${{ matrix.cross }} - run: | - if [ "${USE_CROSS}" == "true" ]; then - cross build --release --target "${TARGET}" --features parallel,caching - else - cargo build --release --target "${TARGET}" --features parallel,caching - fi - shell: bash - - - name: Strip binary (if applicable) - if: matrix.strip - env: - TARGET: ${{ matrix.target }} - EXT: ${{ matrix.ext }} - run: | - strip "target/${TARGET}/release/thread${EXT}" - shell: bash - - - name: Create archive - id: archive - env: - VERSION: ${{ needs.create-release.outputs.version }} - TARGET: ${{ matrix.target }} - OS_TYPE: ${{ matrix.os }} - run: | - ARCHIVE_NAME="thread-${VERSION}-${TARGET}" - if [ "${OS_TYPE}" == "windows-latest" ]; then - 7z a "${ARCHIVE_NAME}.zip" "./target/${TARGET}/release/thread.exe" - echo "asset_path=${ARCHIVE_NAME}.zip" >> "$GITHUB_OUTPUT" - echo "asset_content_type=application/zip" >> "$GITHUB_OUTPUT" - else - tar czf "${ARCHIVE_NAME}.tar.gz" -C "target/${TARGET}/release" thread - echo "asset_path=${ARCHIVE_NAME}.tar.gz" >> "$GITHUB_OUTPUT" - echo "asset_content_type=application/gzip" >> "$GITHUB_OUTPUT" - fi - echo "asset_name=${ARCHIVE_NAME}" >> "$GITHUB_OUTPUT" - shell: bash - - - name: Upload release asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-release.outputs.upload_url }} - asset_path: ${{ steps.archive.outputs.asset_path }} - asset_name: ${{ steps.archive.outputs.asset_name }}${{ matrix.os == 'windows-latest' && '.zip' || '.tar.gz' }} - asset_content_type: ${{ steps.archive.outputs.asset_content_type }} - - # Build and publish WASM package - build-wasm: - name: Build & Publish WASM - needs: create-release - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - with: - targets: wasm32-unknown-unknown - - - name: Cache Rust dependencies - uses: Swatinem/rust-cache@v2 - - - name: Install wasm-pack - uses: jetli/wasm-pack-action@v0.4.0 - - - name: Build WASM package - run: cargo run -p xtask build-wasm --release - - - name: Create WASM archive - env: - VERSION: ${{ needs.create-release.outputs.version }} - run: | - ARCHIVE_NAME="thread-wasm-${VERSION}" - tar czf "${ARCHIVE_NAME}.tar.gz" \ - thread_wasm_bg.wasm \ - thread_wasm.js \ - thread_wasm.d.ts \ - package.json - - - name: Upload WASM archive - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - VERSION: ${{ needs.create-release.outputs.version }} - with: - upload_url: ${{ needs.create-release.outputs.upload_url }} - asset_path: thread-wasm-${{ needs.create-release.outputs.version }}.tar.gz - asset_name: thread-wasm-${{ needs.create-release.outputs.version }}.tar.gz - asset_content_type: application/gzip - - # Build Docker images - build-docker: - name: Build Docker Images - needs: create-release - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ghcr.io/${{ github.repository }} - tags: | - type=semver,pattern={{version}},value=v${{ needs.create-release.outputs.version }} - type=semver,pattern={{major}}.{{minor}},value=v${{ needs.create-release.outputs.version }} - type=semver,pattern={{major}},value=v${{ needs.create-release.outputs.version }} - type=raw,value=latest - - - name: Build and push - uses: docker/build-push-action@v5 - with: - context: . - platforms: linux/amd64,linux/arm64 - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - # Publish to crates.io (optional, requires CARGO_REGISTRY_TOKEN) - publish-crates: - name: Publish to crates.io - needs: [create-release, build-cli] - runs-on: ubuntu-latest - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - - name: Cache Rust dependencies - uses: Swatinem/rust-cache@v2 - - - name: Publish to crates.io - env: - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} - run: | - # Publish in dependency order - cargo publish -p thread-utils --allow-dirty || echo "Package already published" - cargo publish -p thread-language --allow-dirty || echo "Package already published" - cargo publish -p thread-ast-engine --allow-dirty || echo "Package already published" - cargo publish -p thread-rule-engine --allow-dirty || echo "Package already published" - cargo publish -p thread-services --allow-dirty || echo "Package already published" - cargo publish -p thread-flow --allow-dirty || echo "Package already published" - cargo publish -p thread-wasm --allow-dirty || echo "Package already published" - - # Deploy to Cloudflare Workers (Edge deployment) - deploy-edge: - name: Deploy to Cloudflare Edge - needs: [create-release, build-wasm] - runs-on: ubuntu-latest - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') - environment: - name: production-edge - url: https://thread.knit.li - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - with: - targets: wasm32-unknown-unknown - - - name: Cache Rust dependencies - uses: Swatinem/rust-cache@v2 - - - name: Install wasm-pack - uses: jetli/wasm-pack-action@v0.4.0 - - - name: Build WASM for Workers - run: cargo run -p xtask build-wasm --release - - - name: Deploy to Cloudflare Workers - uses: cloudflare/wrangler-action@v3 - with: - apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} - accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} - command: deploy --env production - - # Release notification - notify: - name: Release Notification - needs: [create-release, build-cli, build-wasm, build-docker] - runs-on: ubuntu-latest - if: always() - steps: - - name: Check release status - env: - VERSION: ${{ needs.create-release.outputs.version }} - CLI_RESULT: ${{ needs.build-cli.result }} - WASM_RESULT: ${{ needs.build-wasm.result }} - DOCKER_RESULT: ${{ needs.build-docker.result }} - run: | - echo "Release v${VERSION} completed" - echo "CLI builds: ${CLI_RESULT}" - echo "WASM build: ${WASM_RESULT}" - echo "Docker build: ${DOCKER_RESULT}" + # Create GitHub release + create-release: + name: Create Release + runs-on: ubuntu-latest + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + version: ${{ steps.get_version.outputs.version }} + steps: + - run: | + apt update && sudo apt install -y clang llvm-dev + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Get version + id: get_version + env: + INPUT_VERSION: ${{ github.event.inputs.version }} + REF_NAME: ${{ github.ref }} + run: | + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + VERSION="${INPUT_VERSION}" + else + VERSION="${REF_NAME#refs/tags/v}" + fi + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + echo "Version: ${VERSION}" + - name: Generate changelog + id: changelog + env: + VERSION: ${{ steps.get_version.outputs.version }} + run: | + # Extract changelog for this version + if [ -f "CHANGELOG.md" ]; then + CHANGELOG="$(sed -n "/## \[${VERSION}\]/,/## \[/p" CHANGELOG.md | sed '$ d')" + else + CHANGELOG="Release ${VERSION}" + fi + { + echo "changelog<> "$GITHUB_OUTPUT" + - name: Create GitHub Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: v${{ steps.get_version.outputs.version }} + release_name: Release ${{ steps.get_version.outputs.version }} + body: ${{ steps.changelog.outputs.changelog }} + draft: false + prerelease: false + # Build CLI binaries for multiple platforms + build-cli: + name: Build CLI (${{ matrix.target }}) + needs: create-release + strategy: + fail-fast: false + matrix: + include: + # Linux x86_64 + - target: x86_64-unknown-linux-gnu + os: ubuntu-latest + cross: false + strip: true + # Linux x86_64 (musl for static linking) + - target: x86_64-unknown-linux-musl + os: ubuntu-latest + cross: true + strip: true + # Linux ARM64 + - target: aarch64-unknown-linux-gnu + os: ubuntu-latest + cross: true + strip: false + # macOS x86_64 + - target: x86_64-apple-darwin + os: macos-latest + cross: false + strip: true + # macOS ARM64 (Apple Silicon) + - target: aarch64-apple-darwin + os: macos-latest + cross: false + strip: true + # Windows x86_64 + - target: x86_64-pc-windows-msvc + os: windows-latest + cross: false + strip: false + ext: .exe + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + with: + key: ${{ matrix.target }} + - name: Install cross (if needed) + if: matrix.cross + run: cargo install cross --git https://github.com/cross-rs/cross + - name: Build release binary + env: + TARGET: ${{ matrix.target }} + USE_CROSS: ${{ matrix.cross }} + run: | + if [ "${USE_CROSS}" == "true" ]; then + cross build --release --target "${TARGET}" --features parallel,caching + else + cargo build --release --target "${TARGET}" --features parallel,caching + fi + shell: bash + - name: Strip binary (if applicable) + if: matrix.strip + env: + TARGET: ${{ matrix.target }} + EXT: ${{ matrix.ext }} + run: | + strip "target/${TARGET}/release/thread${EXT}" + shell: bash + - name: Create archive + id: archive + env: + VERSION: ${{ needs.create-release.outputs.version }} + TARGET: ${{ matrix.target }} + OS_TYPE: ${{ matrix.os }} + run: | + ARCHIVE_NAME="thread-${VERSION}-${TARGET}" + if [ "${OS_TYPE}" == "windows-latest" ]; then + 7z a "${ARCHIVE_NAME}.zip" "./target/${TARGET}/release/thread.exe" + echo "asset_path=${ARCHIVE_NAME}.zip" >> "$GITHUB_OUTPUT" + echo "asset_content_type=application/zip" >> "$GITHUB_OUTPUT" + else + tar czf "${ARCHIVE_NAME}.tar.gz" -C "target/${TARGET}/release" thread + echo "asset_path=${ARCHIVE_NAME}.tar.gz" >> "$GITHUB_OUTPUT" + echo "asset_content_type=application/gzip" >> "$GITHUB_OUTPUT" + fi + echo "asset_name=${ARCHIVE_NAME}" >> "$GITHUB_OUTPUT" + shell: bash + - name: Upload release asset + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.create-release.outputs.upload_url }} + asset_path: ${{ steps.archive.outputs.asset_path }} + asset_name: ${{ steps.archive.outputs.asset_name }}${{ matrix.os == 'windows-latest' && '.zip' || '.tar.gz' }} + asset_content_type: ${{ steps.archive.outputs.asset_content_type }} + # Build and publish WASM package + build-wasm: + name: Build & Publish WASM + needs: create-release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-unknown-unknown + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + - name: Install wasm-pack + uses: jetli/wasm-pack-action@v0.4.0 + - name: Build WASM package + run: cargo run -p xtask build-wasm --release + - name: Create WASM archive + env: + VERSION: ${{ needs.create-release.outputs.version }} + run: | + ARCHIVE_NAME="thread-wasm-${VERSION}" + tar czf "${ARCHIVE_NAME}.tar.gz" \ + thread_wasm_bg.wasm \ + thread_wasm.js \ + thread_wasm.d.ts \ + package.json + - name: Upload WASM archive + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + VERSION: ${{ needs.create-release.outputs.version }} + with: + upload_url: ${{ needs.create-release.outputs.upload_url }} + asset_path: thread-wasm-${{ needs.create-release.outputs.version }}.tar.gz + asset_name: thread-wasm-${{ needs.create-release.outputs.version }}.tar.gz + asset_content_type: application/gzip + # Build Docker images + build-docker: + name: Build Docker Images + needs: create-release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository }} + tags: | + type=semver,pattern={{version}},value=v${{ needs.create-release.outputs.version }} + type=semver,pattern={{major}}.{{minor}},value=v${{ needs.create-release.outputs.version }} + type=semver,pattern={{major}},value=v${{ needs.create-release.outputs.version }} + type=raw,value=latest + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + # Publish to crates.io (optional, requires CARGO_REGISTRY_TOKEN) + publish-crates: + name: Publish to crates.io + needs: + - create-release + - build-cli + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + - name: Publish to crates.io + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + run: | + # Publish in dependency order + cargo publish -p thread-utils --allow-dirty || echo "Package already published" + cargo publish -p thread-language --allow-dirty || echo "Package already published" + cargo publish -p thread-ast-engine --allow-dirty || echo "Package already published" + cargo publish -p thread-rule-engine --allow-dirty || echo "Package already published" + cargo publish -p thread-services --allow-dirty || echo "Package already published" + cargo publish -p thread-flow --allow-dirty || echo "Package already published" + cargo publish -p thread-wasm --allow-dirty || echo "Package already published" + # Deploy to Cloudflare Workers (Edge deployment) + deploy-edge: + name: Deploy to Cloudflare Edge + needs: + - create-release + - build-wasm + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + environment: + name: production-edge + url: https://thread.knitli.com + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-unknown-unknown + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + - name: Install wasm-pack + uses: jetli/wasm-pack-action@v0.4.0 + - name: Build WASM for Workers + run: cargo run -p xtask build-wasm --release + - name: Deploy to Cloudflare Workers + uses: cloudflare/wrangler-action@v3 + with: + apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} + accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} + command: deploy --env production + # Release notification + notify: + name: Release Notification + needs: + - create-release + - build-cli + - build-wasm + - build-docker + runs-on: ubuntu-latest + if: always() + steps: + - name: Check release status + env: + VERSION: ${{ needs.create-release.outputs.version }} + CLI_RESULT: ${{ needs.build-cli.result }} + WASM_RESULT: ${{ needs.build-wasm.result }} + DOCKER_RESULT: ${{ needs.build-docker.result }} + run: | + echo "Release v${VERSION} completed" + echo "CLI builds: ${CLI_RESULT}" + echo "WASM build: ${WASM_RESULT}" + echo "Docker build: ${DOCKER_RESULT}" diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index f0e9e7a..332caa4 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -5,337 +5,237 @@ # ! GitHub Action for comprehensive security scanning # ! Runs on schedule, PRs, and manual triggers name: Security Audit - on: - # Run daily at 2 AM UTC - schedule: - - cron: "0 2 * * *" - - # Run on PRs to main - pull_request: - branches: [main] - paths: - - "Cargo.toml" - - "Cargo.lock" - - "**/Cargo.toml" - - # Run on push to main - push: - branches: [main] - paths: - - "Cargo.toml" - - "Cargo.lock" - - "**/Cargo.toml" - - # Manual trigger - workflow_dispatch: - + # Run daily at 2 AM UTC + schedule: + - cron: 0 2 * * * + # Run on PRs to main + pull_request: + branches: + - main + paths: + - Cargo.toml + - Cargo.lock + - '**/Cargo.toml' + # Run on push to main + push: + branches: + - main + paths: + - Cargo.toml + - Cargo.lock + - '**/Cargo.toml' + # Manual trigger + workflow_dispatch: env: - RUST_BACKTRACE: 1 - CARGO_TERM_COLOR: always - + RUST_BACKTRACE: 1 + CARGO_TERM_COLOR: always permissions: - contents: read - issues: write - security-events: write - + contents: read + issues: write + security-events: write jobs: - # Vulnerability scanning with cargo-audit - cargo-audit: - name: Cargo Audit - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - - name: Cache Rust dependencies - uses: Swatinem/rust-cache@v2 - - - name: Install cargo-audit - run: cargo install cargo-audit --locked - - - name: Run cargo audit - id: audit - run: | - cargo audit --json > audit-results.json || true - cat audit-results.json - - - name: Parse audit results - id: parse - run: | - VULNERABILITIES="$(jq '.vulnerabilities.count' audit-results.json)" - echo "vulnerabilities=${VULNERABILITIES}" >> "$GITHUB_OUTPUT" - - if [ "${VULNERABILITIES}" -gt 0 ]; then - echo "::warning::Found ${VULNERABILITIES} vulnerabilities" - jq -r '.vulnerabilities.list[] | "::warning file=Cargo.toml,title=\(.advisory.id)::\(.advisory.title) in \(.package.name) \(.package.version)"' audit-results.json - fi - - - name: Upload audit results - uses: actions/upload-artifact@v4 - if: always() - with: - name: cargo-audit-results - path: audit-results.json - retention-days: 30 - - - name: Create issue for vulnerabilities - if: steps.parse.outputs.vulnerabilities != '0' && github.event_name == 'schedule' - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const audit = JSON.parse(fs.readFileSync('audit-results.json', 'utf8')); - - if (audit.vulnerabilities.count === 0) return; - - const vulns = audit.vulnerabilities.list.map(v => { - return `### ${v.advisory.id}: ${v.advisory.title} - - **Package**: \`${v.package.name}@${v.package.version}\` - **Severity**: ${v.advisory.metadata?.severity || 'Unknown'} - **URL**: ${v.advisory.url} - - ${v.advisory.description} - - **Patched Versions**: ${v.versions.patched.join(', ') || 'None'} - `; - }).join('\n\n---\n\n'); - - const title = `Security: ${audit.vulnerabilities.count} vulnerabilities found`; - const body = `## Security Audit Report - - **Date**: ${new Date().toISOString()} - **Vulnerabilities**: ${audit.vulnerabilities.count} - - ${vulns} - - --- - - This issue was automatically created by the security audit workflow.`; - - await github.rest.issues.create({ - owner: context.repo.owner, - repo: context.repo.repo, - title: title, - body: body, - labels: ['security', 'dependencies'] - }); - - # Dependency review for PRs - dependency-review: - name: Dependency Review - runs-on: ubuntu-latest - if: github.event_name == 'pull_request' - steps: - - uses: actions/checkout@v4 - - - name: Dependency Review - uses: actions/dependency-review-action@v4 - with: - fail-on-severity: moderate - deny-licenses: GPL-3.0, AGPL-3.0 - comment-summary-in-pr: always - - # SAST scanning with Semgrep - semgrep: - name: Semgrep SAST - runs-on: ubuntu-latest - if: github.event_name != 'schedule' - steps: - - uses: actions/checkout@v4 - - - name: Run Semgrep - uses: returntocorp/semgrep-action@v1 - with: - config: >- - p/rust - p/security-audit - p/secrets - - - name: Upload SARIF results - if: always() - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: semgrep.sarif - - # License compliance scanning - license-check: - name: License Compliance - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - - name: Install cargo-license - run: cargo install cargo-license --locked - - - name: Check licenses - run: | - cargo license --json > licenses.json - - # Check for incompatible licenses - INCOMPATIBLE=$(jq -r '.[] | select(.license | contains("GPL-3.0") or contains("AGPL-3.0")) | .name' licenses.json) - - if [ -n "$INCOMPATIBLE" ]; then - echo "::error::Found incompatible licenses:" - echo "$INCOMPATIBLE" - exit 1 - fi - - - name: Upload license report - uses: actions/upload-artifact@v4 - if: always() - with: - name: license-report - path: licenses.json - retention-days: 30 - - # Supply chain security with cargo-deny - cargo-deny: - name: Cargo Deny - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - - name: Install cargo-deny - run: cargo install cargo-deny --locked - - - name: Check advisories - run: cargo deny check advisories - - - name: Check licenses - run: cargo deny check licenses - - - name: Check bans - run: cargo deny check bans - - - name: Check sources - run: cargo deny check sources - - # Outdated dependency check - outdated: - name: Outdated Dependencies - runs-on: ubuntu-latest - if: github.event_name == 'schedule' - steps: - - uses: actions/checkout@v4 - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - - name: Install cargo-outdated - run: cargo install cargo-outdated --locked - - - name: Check for outdated dependencies - id: outdated - run: | - cargo outdated --format json > outdated.json || true - - OUTDATED_COUNT="$(jq '[.dependencies[] | select(.latest != .project)] | length' outdated.json)" - echo "outdated=${OUTDATED_COUNT}" >> "$GITHUB_OUTPUT" - - - name: Upload outdated report - uses: actions/upload-artifact@v4 - if: always() - with: - name: outdated-dependencies - path: outdated.json - retention-days: 30 - - - name: Create issue for outdated dependencies - if: steps.outdated.outputs.outdated != '0' - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const outdated = JSON.parse(fs.readFileSync('outdated.json', 'utf8')); - - const deps = outdated.dependencies - .filter(d => d.latest !== d.project) - .map(d => `- \`${d.name}\`: ${d.project} β†’ ${d.latest}`) - .join('\n'); - - if (!deps) return; - - const title = `Dependencies: ${outdated.dependencies.length} packages outdated`; - const body = `## Outdated Dependencies Report - - **Date**: ${new Date().toISOString()} - - The following dependencies have newer versions available: - - ${deps} - - --- - - This issue was automatically created by the security audit workflow. - Consider updating these dependencies and running tests.`; - - await github.rest.issues.create({ - owner: context.repo.owner, - repo: context.repo.repo, - title: title, - body: body, - labels: ['dependencies', 'maintenance'] - }); - - # Security policy validation - security-policy: - name: Security Policy Check - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Check SECURITY.md exists - run: | - if [ ! -f "SECURITY.md" ]; then - echo "::error::SECURITY.md file not found" - exit 1 - fi - - - name: Validate security policy - run: | - # Check for required sections - for section in "Supported Versions" "Reporting" "Disclosure"; do - if ! grep -qi "$section" SECURITY.md; then - echo "::warning::SECURITY.md missing section: $section" - fi - done - - # Summary report - security-summary: - name: Security Summary - needs: [cargo-audit, license-check, cargo-deny] - runs-on: ubuntu-latest + # Vulnerability scanning with cargo-audit + cargo-audit: + name: Cargo Audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + - name: Install cargo-audit + run: cargo install cargo-audit --locked + - name: Run cargo audit + id: audit + run: | + cargo audit --json > audit-results.json || true + cat audit-results.json + - name: Parse audit results + id: parse + run: | + VULNERABILITIES="$(jq '.vulnerabilities.count' audit-results.json)" + echo "vulnerabilities=${VULNERABILITIES}" >> "$GITHUB_OUTPUT" + + if [ "${VULNERABILITIES}" -gt 0 ]; then + echo "::warning::Found ${VULNERABILITIES} vulnerabilities" + jq -r '.vulnerabilities.list[] | "::warning file=Cargo.toml,title=\(.advisory.id)::\(.advisory.title) in \(.package.name) \(.package.version)"' audit-results.json + fi + - name: Upload audit results + uses: actions/upload-artifact@v4 if: always() - steps: - - name: Generate summary - run: | - { - echo "## Security Audit Summary" - echo "" - echo "**Date**: \"$(date -u +"%Y-%m-%d %H:%M:%S UTC")\"" - echo "" - echo "### Job Results" - echo "" - echo "- Cargo Audit: ${{ needs.cargo-audit.result }}" - echo "- License Check: ${{ needs.license-check.result }}" - echo "- Cargo Deny: ${{ needs.cargo-deny.result }}" - echo "" - - if [ "${{ needs.cargo-audit.result }}" == "success" ] && \ - [ "${{ needs.license-check.result }}" == "success" ] && \ - [ "${{ needs.cargo-deny.result }}" == "success" ]; then - echo "βœ… **All security checks passed**" - else - echo "❌ **Some security checks failed**" - fi - } >> "$GITHUB_STEP_SUMMARY" + with: + name: cargo-audit-results + path: audit-results.json + retention-days: 30 + - name: Create issue for vulnerabilities + if: steps.parse.outputs.vulnerabilities != '0' && github.event_name == 'schedule' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const audit = JSON.parse(fs.readFileSync('audit-results.json', 'utf8')); + + if (audit.vulnerabilities.count === 0) return; + + const vulns = audit.vulnerabilities.list.map(v => { + return `### ${v.advisory.id}: ${v.advisory.title} + + **Package**: \`${v.package.name}@${v.package.version}\` + **Severity**: ${v.advisory.metadata?.severity || 'Unknown'} + **URL**: ${v.advisory.url} + + ${v.advisory.description} + + **Patched Versions**: ${v.versions.patched.join(', ') || 'None'} + `; + }).join('\n\n---\n\n'); + + const title = `Security: ${audit.vulnerabilities.count} vulnerabilities found`; + const body = `## Security Audit Report + + **Date**: ${new Date().toISOString()} + **Vulnerabilities**: ${audit.vulnerabilities.count} + + ${vulns} + + --- + + This issue was automatically created by the security audit workflow.`; + + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: title, + body: body, + labels: ['security', 'dependencies'] + }); + # Supply chain security with cargo-deny + cargo-deny: + name: Cargo Deny + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Install cargo-deny + run: cargo install cargo-deny --locked + - name: Check advisories + run: cargo deny check advisories + - name: Check licenses + run: cargo deny check licenses + - name: Check bans + run: cargo deny check bans + - name: Check sources + run: cargo deny check sources + # Outdated dependency check + outdated: + name: Outdated Dependencies + runs-on: ubuntu-latest + if: github.event_name == 'schedule' + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + - name: Install cargo-outdated + run: cargo install cargo-outdated --locked + - name: Check for outdated dependencies + id: outdated + run: | + cargo outdated --format json > outdated.json || true + + OUTDATED_COUNT="$(jq '[.dependencies[] | select(.latest != .project)] | length' outdated.json)" + echo "outdated=${OUTDATED_COUNT}" >> "$GITHUB_OUTPUT" + - name: Upload outdated report + uses: actions/upload-artifact@v4 + if: always() + with: + name: outdated-dependencies + path: outdated.json + retention-days: 30 + - name: Create issue for outdated dependencies + if: steps.outdated.outputs.outdated != '0' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const outdated = JSON.parse(fs.readFileSync('outdated.json', 'utf8')); + + const deps = outdated.dependencies + .filter(d => d.latest !== d.project) + .map(d => `- \`${d.name}\`: ${d.project} β†’ ${d.latest}`) + .join('\n'); + + if (!deps) return; + + const title = `Dependencies: ${outdated.dependencies.length} packages outdated`; + const body = `## Outdated Dependencies Report + + **Date**: ${new Date().toISOString()} + + The following dependencies have newer versions available: + + ${deps} + + --- + + This issue was automatically created by the security audit workflow. + Consider updating these dependencies and running tests.`; + + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: title, + body: body, + labels: ['dependencies', 'maintenance'] + }); + # Security policy validation + security-policy: + name: Security Policy Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Check SECURITY.md exists + run: | + if [ ! -f "SECURITY.md" ]; then + echo "::error::SECURITY.md file not found" + exit 1 + fi + - name: Validate security policy + run: | + # Check for required sections + for section in "Supported Versions" "Reporting" "Disclosure"; do + if ! grep -qi "$section" SECURITY.md; then + echo "::warning::SECURITY.md missing section: $section" + fi + done + # Summary report + security-summary: + name: Security Summary + needs: + - cargo-audit + - cargo-deny + runs-on: ubuntu-latest + if: always() + steps: + - name: Generate summary + run: | + { + echo "## Security Audit Summary" + echo "" + echo "**Date**: \"$(date -u +"%Y-%m-%d %H:%M:%S UTC")\"" + echo "" + echo "### Job Results" + echo "" + echo "- Cargo Audit: ${{ needs.cargo-audit.result }}" + echo "- Cargo Deny: ${{ needs.cargo-deny.result }}" + echo "" + + if [ "${{ needs.cargo-audit.result }}" == "success" ] && \ + [ "${{ needs.cargo-deny.result }}" == "success" ]; then + echo "βœ… **All security checks passed**" + else + echo "❌ **Some security checks failed**" + fi + } >> "$GITHUB_STEP_SUMMARY" diff --git a/.vscode/settings.json b/.vscode/settings.json index f6ed3cd..c69d46c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,10 +2,10 @@ "git.enabled": true, "chat.mcp.serverSampling": { "allowedModels": { - "claude-sonnet-4.5": true, + "claude-sonnet-4.6": true, "claude-haiku-4.5": true, "gemini-3.0-flash": true, - "gpt-5.1-codex-max": true + "gpt-5.3-codex": true }, "autoApprove": { "allowedDuringChat": true, @@ -33,4 +33,7 @@ "scm.inputFontSize": 13, "debug.console.fontSize": 14, "terminal.integrated.fontSize": 14, + "rust-analyzer.files.exclude": [ + "crates/flow/tests/test_data/syntax_error.rs" + ] } diff --git a/CLAUDE.md b/CLAUDE.md index 458a57f..14118b7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,6 +19,7 @@ Thread is a **service-library dual architecture** for safe, fast, flexible code The project is forked from ast-grep and enhanced with ReCoco (Rust-only fork of CocoIndex) dataflow framework for production use as a code analysis engine for AI context generation. Thread's `thread-flow` crate serves as the implementation layer for ReCoco capabilities. **Key Differentiators**: + - βœ… **Content-Addressed Caching**: 50x+ performance gains on repeated analysis via automatic incremental updates - βœ… **Dual Deployment**: Single codebase compiles to both CLI (Rayon parallelism) and Edge (tokio async, Cloudflare Workers) - βœ… **Persistent Storage**: Native integration with Postgres (local), D1 (edge), Qdrant (vectors) @@ -70,13 +71,14 @@ Thread follows a **service-library dual architecture** (Constitution v2.0.0, Pri ### Feature Gating ReCoco uses Cargo feature flags to enable modular builds: + - Core dataflow primitives available without features - Storage backends gated behind `postgres`, `d1`, `qdrant` features - Thread selectively enables only required features via `thread-flow` ### Dependency Chain -``` +```plaintext thread-services β†’ thread-flow β†’ recoco (public crate) ↓ [Postgres | D1 | Qdrant] @@ -89,6 +91,7 @@ thread-services β†’ thread-flow β†’ recoco (public crate) ### Core Library (Open Source) The **D1 storage backend** is a first-class library feature in `crates/flow/src/incremental/backends/d1.rs`: + - βœ… Part of Thread's multi-backend storage abstraction - βœ… API documentation in `docs/api/D1_INTEGRATION_API.md` - βœ… Integration tests in `crates/flow/tests/incremental_d1_tests.rs` @@ -99,6 +102,7 @@ The **D1 storage backend** is a first-class library feature in `crates/flow/src/ ### Deployment Machinery (Segregated) **Cloudflare Workers deployment materials** are segregated in the **gitignored** `crates/cloudflare/` directory: + - πŸ”’ **Configuration**: `config/wrangler.production.toml.example` - Production Wrangler configuration - πŸ“š **Documentation**: `docs/EDGE_DEPLOYMENT.md` - Comprehensive deployment guide (17KB) - πŸš€ **Scripts**: `scripts/deploy.sh` - Automated deployment automation (5.9KB) @@ -332,6 +336,7 @@ cargo run -p xtask build-wasm --release ### Quality Gates (Constitutional Requirements) Before any PR merge, verify: + - βœ… `mise run lint` passes (zero warnings) - βœ… `cargo nextest run --all-features` passes (100% success) - βœ… `mise run ci` completes successfully @@ -379,8 +384,10 @@ The library provides multiple tools to help me AI assistants more efficient: - NEVER include explanatory text or markdown outside the JSON structure ## Active Technologies + - Rust (edition 2024, aligning with Thread's existing codebase) (001-realtime-code-graph) - Multi-backend architecture with deployment-specific primaries: (001-realtime-code-graph) ## Recent Changes + - 001-realtime-code-graph: Added Rust (edition 2024, aligning with Thread's existing codebase) diff --git a/Cargo.lock b/Cargo.lock index f37bfb8..3d3971f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,9 +105,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "arraydeque" @@ -127,6 +127,22 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "astral-tokio-tar" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec179a06c1769b1e42e1e2cbe74c7dcdb3d6383c838454d063eaac5bbb7ebbe5" +dependencies = [ + "filetime", + "futures-core", + "libc", + "portable-atomic", + "rustc-hash", + "tokio", + "tokio-stream", + "xattr", +] + [[package]] name = "async-lock" version = "3.4.2" @@ -194,9 +210,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-lc-rs" -version = "1.15.4" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7b6141e96a8c160799cc2d5adecd5cbbe5054cb8c7c4af53da0f83bb7ad256" +checksum = "d9a7b350e3bb1767102698302bc37256cbd48422809984b98d292c40e2579aa9" dependencies = [ "aws-lc-sys", "zeroize", @@ -204,9 +220,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.37.0" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c34dda4df7017c8db52132f0f8a2e0f8161649d15723ed63fc00c82d0f2081a" +checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549" dependencies = [ "cc", "cmake", @@ -326,15 +342,9 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" dependencies = [ "serde_core", ] @@ -364,11 +374,14 @@ dependencies = [ [[package]] name = "bollard" -version = "0.18.1" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97ccca1260af6a459d75994ad5acc1651bcabcbdbc41467cc9786519ab854c30" +checksum = "227aa051deec8d16bd9c34605e7aaf153f240e35483dd42f6f78903847934738" dependencies = [ + "async-stream", "base64 0.22.1", + "bitflags", + "bollard-buildkit-proto", "bollard-stubs", "bytes", "futures-core", @@ -383,33 +396,54 @@ dependencies = [ "hyper-util", "hyperlocal", "log", + "num", "pin-project-lite", + "rand 0.9.2", "rustls", "rustls-native-certs", - "rustls-pemfile", "rustls-pki-types", "serde", "serde_derive", "serde_json", - "serde_repr", "serde_urlencoded", - "thiserror 2.0.18", + "thiserror", + "time", "tokio", + "tokio-stream", "tokio-util", + "tonic", "tower-service", "url", "winapi", ] +[[package]] +name = "bollard-buildkit-proto" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a885520bf6249ab931a764ffdb87b0ceef48e6e7d807cfdb21b751e086e1ad" +dependencies = [ + "prost", + "prost-types", + "tonic", + "tonic-prost", + "ureq", +] + [[package]] name = "bollard-stubs" -version = "1.47.1-rc.27.3.1" +version = "1.52.1-rc.29.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f179cfbddb6e77a5472703d4b30436bff32929c0aa8a9008ecf23d1d3cdd0da" +checksum = "0f0a8ca8799131c1837d1282c3f81f31e76ceb0ce426e04a7fe1ccee3287c066" dependencies = [ + "base64 0.22.1", + "bollard-buildkit-proto", + "bytes", + "prost", "serde", + "serde_json", "serde_repr", - "serde_with", + "time", ] [[package]] @@ -424,9 +458,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "byteorder" @@ -436,9 +470,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" dependencies = [ "serde", ] @@ -451,9 +485,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.55" +version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ "find-msvc-tools", "jobserver", @@ -475,9 +509,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -516,18 +550,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.56" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75ca66430e33a14957acc24c5077b503e7d374151b2b4b3a10c83b4ceb4be0e" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.56" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793207c7fa6300a0608d1080b858e5fdbe713cdc1c8db9fb17777d8a13e63df0" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" dependencies = [ "anstyle", "clap_lex", @@ -535,9 +569,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" @@ -657,65 +691,16 @@ checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" [[package]] name = "criterion" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot 0.5.0", - "is-terminal", - "itertools 0.10.5", - "num-traits", - "once_cell", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot 0.5.0", - "itertools 0.13.0", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" dependencies = [ "alloca", "anes", "cast", "ciborium", "clap", - "criterion-plot 0.8.1", + "criterion-plot", "itertools 0.13.0", "num-traits", "oorandom", @@ -731,19 +716,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools 0.10.5", -] - -[[package]] -name = "criterion-plot" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" dependencies = [ "cast", "itertools 0.13.0", @@ -891,9 +866,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", "serde_core", @@ -982,9 +957,9 @@ dependencies = [ [[package]] name = "env_filter" -version = "0.1.4" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" dependencies = [ "log", "regex", @@ -992,9 +967,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" dependencies = [ "anstream", "anstyle", @@ -1030,6 +1005,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "etcetera" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de48cc4d1c1d97a20fd819def54b890cadde72ed3ad0c614822a0a433361be96" +dependencies = [ + "cfg-if", + "windows-sys 0.61.2", +] + [[package]] name = "event-listener" version = "5.4.1" @@ -1075,6 +1060,17 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "ferroid" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb330bbd4cb7a5b9f559427f06f98a4f853a137c8298f3bd3f8ca57663e21986" +dependencies = [ + "portable-atomic", + "rand 0.9.2", + "web-time", +] + [[package]] name = "filetime" version = "0.2.27" @@ -1153,9 +1149,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -1168,9 +1164,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -1178,15 +1174,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -1206,15 +1202,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", @@ -1223,21 +1219,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -1247,7 +1243,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -1288,6 +1283,19 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + [[package]] name = "globset" version = "0.4.18" @@ -1431,11 +1439,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1536,7 +1544,20 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots 1.0.5", + "webpki-roots 1.0.6", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", ] [[package]] @@ -1557,14 +1578,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", "http", "http-body", @@ -1701,6 +1721,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -1798,32 +1824,12 @@ dependencies = [ "serde", ] -[[package]] -name = "is-terminal" -version = "0.4.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.61.2", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -1850,9 +1856,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" -version = "0.2.18" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +checksum = "b3e3d65f018c6ae946ab16e80944b97096ed73c35b221d1c478a6c81d8f57940" dependencies = [ "jiff-static", "log", @@ -1863,9 +1869,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.18" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +checksum = "a17c2b211d863c7fde02cbea8a3c1a439b98e109286554f2860bdded7ff83818" dependencies = [ "proc-macro2", "quote", @@ -1884,9 +1890,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.85" +version = "0.3.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +checksum = "14dc6f6450b3f6d4ed5b16327f38fed626d375a886159ca555bd7822c0c3a5a6" dependencies = [ "once_cell", "wasm-bindgen", @@ -1901,11 +1907,17 @@ dependencies = [ "spin", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.180" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libm" @@ -1913,15 +1925,25 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +[[package]] +name = "libmimalloc-sys" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "libredox" version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ - "bitflags 2.10.0", + "bitflags", "libc", - "redox_syscall 0.7.0", + "redox_syscall 0.7.2", ] [[package]] @@ -1947,9 +1969,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" @@ -2005,25 +2027,15 @@ dependencies = [ [[package]] name = "md5" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" +checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0" [[package]] name = "memchr" -version = "2.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" - -[[package]] -name = "metrics" -version = "0.23.1" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3045b4193fbdc5b5681f32f11070da9be3609f189a79f3390706d42587f46bb5" -dependencies = [ - "ahash", - "portable-atomic", -] +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "metrics" @@ -2037,9 +2049,9 @@ dependencies = [ [[package]] name = "metrics-exporter-prometheus" -version = "0.16.2" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" +checksum = "3589659543c04c7dc5526ec858591015b87cd8746583b51b48ef4353f99dbcda" dependencies = [ "base64 0.22.1", "http-body-util", @@ -2048,30 +2060,40 @@ dependencies = [ "hyper-util", "indexmap 2.13.0", "ipnet", - "metrics 0.24.3", + "metrics", "metrics-util", "quanta", - "thiserror 1.0.69", + "rustls", + "thiserror", "tokio", "tracing", ] [[package]] name = "metrics-util" -version = "0.19.1" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8496cc523d1f94c1385dd8f0f0c2c480b2b8aeccb5b7e4485ad6365523ae376" +checksum = "cdfb1365fea27e6dd9dc1dbc19f570198bc86914533ad639dae939635f096be4" dependencies = [ "crossbeam-epoch", "crossbeam-utils", - "hashbrown 0.15.5", - "metrics 0.24.3", + "hashbrown 0.16.1", + "metrics", "quanta", "rand 0.9.2", "rand_xoshiro", "sketches-ddsketch", ] +[[package]] +name = "mimalloc" +version = "0.1.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8" +dependencies = [ + "libmimalloc-sys", +] + [[package]] name = "mime" version = "0.3.17" @@ -2121,17 +2143,17 @@ dependencies = [ [[package]] name = "native-tls" -version = "0.2.14" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" dependencies = [ "libc", "log", "openssl", - "openssl-probe 0.1.6", + "openssl-probe", "openssl-sys", "schannel", - "security-framework 2.11.1", + "security-framework", "security-framework-sys", "tempfile", ] @@ -2145,6 +2167,30 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + [[package]] name = "num-bigint-dig" version = "0.8.6" @@ -2161,11 +2207,20 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" [[package]] name = "num-integer" @@ -2187,6 +2242,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -2207,6 +2273,24 @@ dependencies = [ "libc", ] +[[package]] +name = "objc2-core-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" +dependencies = [ + "bitflags", +] + +[[package]] +name = "objc2-system-configuration" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7216bd11cbda54ccabcab84d523dc93b858ec75ecfb3a7d89513fa22464da396" +dependencies = [ + "objc2-core-foundation", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -2231,7 +2315,7 @@ version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ - "bitflags 2.10.0", + "bitflags", "cfg-if", "foreign-types", "libc", @@ -2251,12 +2335,6 @@ dependencies = [ "syn", ] -[[package]] -name = "openssl-probe" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" - [[package]] name = "openssl-probe" version = "0.2.1" @@ -2527,15 +2605,15 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" dependencies = [ "portable-atomic", ] @@ -2593,13 +2671,55 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ - "unicode-ident", + "prost", ] [[package]] @@ -2631,7 +2751,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror 2.0.18", + "thiserror", "tokio", "tracing", "web-time", @@ -2652,7 +2772,7 @@ dependencies = [ "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.18", + "thiserror", "tinyvec", "tracing", "web-time", @@ -2757,9 +2877,9 @@ dependencies = [ [[package]] name = "rapidhash" -version = "4.2.1" +version = "4.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d8b5b858a440a0bc02625b62dd95131b9201aa9f69f411195dd4a7cfb1de3d7" +checksum = "b5e48930979c155e2f33aa36ab3119b5ee81332beb6482199a8ecd6029b80b59" dependencies = [ "rand 0.9.2", "rustversion", @@ -2771,7 +2891,7 @@ version = "11.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" dependencies = [ - "bitflags 2.10.0", + "bitflags", ] [[package]] @@ -2834,7 +2954,7 @@ dependencies = [ "phf 0.12.1", "recoco-utils", "rustls", - "schemars 1.2.0", + "schemars 1.2.1", "serde", "serde_json", "sqlx", @@ -2892,31 +3012,22 @@ dependencies = [ "yaml-rust2", ] -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags", ] [[package]] name = "redox_syscall" -version = "0.7.0" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" +checksum = "6d94dd2f7cd932d4dc02cc8b2b50dfd38bd079a4e5d79198b99743d7fcf9a4b4" dependencies = [ - "bitflags 2.10.0", + "bitflags", ] [[package]] @@ -2941,9 +3052,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -2953,9 +3064,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -2964,9 +3075,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" @@ -3009,7 +3120,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots 1.0.5", + "webpki-roots 1.0.6", ] [[package]] @@ -3052,7 +3163,7 @@ version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" dependencies = [ - "bitflags 2.10.0", + "bitflags", "fallible-iterator 0.3.0", "fallible-streaming-iterator", "hashlink 0.9.1", @@ -3068,11 +3179,11 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.10.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -3081,9 +3192,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.36" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ "aws-lc-rs", "log", @@ -3101,19 +3212,10 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ - "openssl-probe 0.2.1", + "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.5.1", -] - -[[package]] -name = "rustls-pemfile" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" -dependencies = [ - "rustls-pki-types", + "security-framework", ] [[package]] @@ -3146,9 +3248,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "same-file" @@ -3182,9 +3284,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", @@ -3195,9 +3297,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4908ad288c5035a8eb12cfdf0d49270def0a268ee162b75eeee0f85d155a7c45" +checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" dependencies = [ "proc-macro2", "quote", @@ -3213,39 +3315,32 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "security-framework" -version = "2.11.1" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.9.4", + "bitflags", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", ] [[package]] -name = "security-framework" -version = "3.5.1" +name = "security-framework-sys" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.10.1", "core-foundation-sys", "libc", - "security-framework-sys", ] [[package]] -name = "security-framework-sys" -version = "2.15.0" +name = "semver" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" -dependencies = [ - "core-foundation-sys", - "libc", -] +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "serde" @@ -3351,9 +3446,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.16.1" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fa237f2807440d238e0364a218270b98f767a00d3dada77b1c53ae88940e2e7" +checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9" dependencies = [ "base64 0.22.1", "chrono", @@ -3361,7 +3456,7 @@ dependencies = [ "indexmap 1.9.3", "indexmap 2.13.0", "schemars 0.9.0", - "schemars 1.2.0", + "schemars 1.2.1", "serde_core", "serde_json", "serde_with_macros", @@ -3370,9 +3465,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.16.1" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52a8e3ca0ca629121f70ab50f95249e5a6f925cc0f6ffe8256c45b728875706c" +checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0" dependencies = [ "darling", "proc-macro2", @@ -3476,9 +3571,9 @@ checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -3560,7 +3655,7 @@ dependencies = [ "serde_json", "sha2", "smallvec", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", "tracing", @@ -3615,7 +3710,7 @@ checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.10.0", + "bitflags", "byteorder", "bytes", "chrono", @@ -3645,7 +3740,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.18", + "thiserror", "tracing", "uuid", "whoami 1.6.1", @@ -3659,12 +3754,12 @@ checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.10.0", + "bitflags", "byteorder", "chrono", "crc", "dotenvy", - "etcetera", + "etcetera 0.8.0", "futures-channel", "futures-core", "futures-util", @@ -3684,7 +3779,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.18", + "thiserror", "tracing", "uuid", "whoami 1.6.1", @@ -3710,7 +3805,7 @@ dependencies = [ "serde", "serde_urlencoded", "sqlx-core", - "thiserror 2.0.18", + "thiserror", "tracing", "url", "uuid", @@ -3776,9 +3871,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.114" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -3807,11 +3902,11 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags 2.10.0", + "bitflags", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -3834,12 +3929,12 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tempfile" -version = "3.24.0" +version = "3.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.1", "once_cell", "rustix", "windows-sys 0.61.2", @@ -3847,18 +3942,21 @@ dependencies = [ [[package]] name = "testcontainers" -version = "0.23.3" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59a4f01f39bb10fc2a5ab23eb0d888b1e2bb168c157f61a1b98e6c501c639c74" +checksum = "c1c0624faaa317c56d6d19136580be889677259caf5c897941c6f446b4655068" dependencies = [ + "astral-tokio-tar", "async-trait", "bollard", - "bollard-stubs", "bytes", "docker_credential", "either", - "etcetera", + "etcetera 0.11.0", + "ferroid", "futures", + "http", + "itertools 0.14.0", "log", "memchr", "parse-display", @@ -3866,46 +3964,36 @@ dependencies = [ "serde", "serde_json", "serde_with", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", - "tokio-tar", "tokio-util", "url", ] [[package]] name = "testcontainers-modules" -version = "0.11.6" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d43ed4e8f58424c3a2c6c56dbea6643c3c23e8666a34df13c54f0a184e6c707" +checksum = "e5985fde5befe4ffa77a052e035e16c2da86e8bae301baa9f9904ad3c494d357" dependencies = [ "testcontainers", ] -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.18", + "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.69" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", @@ -3913,14 +4001,15 @@ dependencies = [ ] [[package]] -name = "thiserror-impl" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +name = "thread" +version = "0.0.1" dependencies = [ - "proc-macro2", - "quote", - "syn", + "thread-ast-engine", + "thread-flow", + "thread-language", + "thread-rule-engine", + "thread-services", + "thread-utils", ] [[package]] @@ -3929,9 +4018,9 @@ version = "0.1.0" dependencies = [ "bit-set", "cc", - "criterion 0.8.1", + "criterion", "regex", - "thiserror 2.0.18", + "thiserror", "thread-language", "thread-utils", "tree-sitter", @@ -3945,14 +4034,15 @@ dependencies = [ "async-trait", "base64 0.22.1", "bytes", - "criterion 0.5.1", + "criterion", "deadpool-postgres", "env_logger", "futures", "log", "md5", - "metrics 0.23.1", + "metrics", "metrics-exporter-prometheus", + "mimalloc", "moka", "rayon", "recoco", @@ -3963,7 +4053,7 @@ dependencies = [ "tempfile", "testcontainers", "testcontainers-modules", - "thiserror 2.0.18", + "thiserror", "thread-ast-engine", "thread-language", "thread-services", @@ -3982,7 +4072,7 @@ dependencies = [ "aho-corasick", "cc", "cfg-if", - "criterion 0.6.0", + "criterion", "ignore", "serde", "thread-ast-engine", @@ -3996,17 +4086,20 @@ dependencies = [ "tree-sitter-elixir", "tree-sitter-go", "tree-sitter-haskell", + "tree-sitter-hcl", "tree-sitter-html", "tree-sitter-java", "tree-sitter-javascript", "tree-sitter-json", "tree-sitter-kotlin-sg", "tree-sitter-lua", + "tree-sitter-nix", "tree-sitter-php", "tree-sitter-python", "tree-sitter-ruby", "tree-sitter-rust", "tree-sitter-scala", + "tree-sitter-solidity", "tree-sitter-swift", "tree-sitter-typescript", "tree-sitter-yaml", @@ -4018,14 +4111,14 @@ version = "0.1.0" dependencies = [ "bit-set", "cc", - "criterion 0.6.0", + "criterion", "globset", "regex", - "schemars 1.2.0", + "schemars 1.2.1", "serde", "serde_json", "serde_yml", - "thiserror 2.0.18", + "thiserror", "thread-ast-engine", "thread-language", "thread-utils", @@ -4047,7 +4140,7 @@ dependencies = [ "pin-project", "recoco-utils", "serde", - "thiserror 2.0.18", + "thiserror", "thread-ast-engine", "thread-language", "thread-utils", @@ -4091,9 +4184,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.45" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", @@ -4106,15 +4199,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "time-macros" -version = "0.2.25" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ "num-conv", "time-core", @@ -4217,7 +4310,7 @@ dependencies = [ "socket2", "tokio", "tokio-util", - "whoami 2.1.0", + "whoami 2.1.1", ] [[package]] @@ -4241,21 +4334,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-tar" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d5714c010ca3e5c27114c1cdeb9d14641ace49874aa5626d7149e47aedace75" -dependencies = [ - "filetime", - "futures-core", - "libc", - "redox_syscall 0.3.5", - "tokio", - "tokio-stream", - "xattr", -] - [[package]] name = "tokio-util" version = "0.7.18" @@ -4270,6 +4348,46 @@ dependencies = [ "tokio", ] +[[package]] +name = "tonic" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" +dependencies = [ + "async-trait", + "axum", + "base64 0.22.1", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "socket2", + "sync_wrapper", + "tokio", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-prost" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" +dependencies = [ + "bytes", + "prost", + "tonic", +] + [[package]] name = "tower" version = "0.5.3" @@ -4278,9 +4396,12 @@ checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", + "indexmap 2.13.0", "pin-project-lite", + "slab", "sync_wrapper", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", @@ -4292,7 +4413,7 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.10.0", + "bitflags", "bytes", "futures-util", "http", @@ -4448,9 +4569,9 @@ dependencies = [ [[package]] name = "tree-sitter-css" -version = "0.23.2" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ad6489794d41350d12a7fbe520e5199f688618f43aace5443980d1ddcf1b29e" +checksum = "a5cbc5e18f29a2c6d6435891f42569525cf95435a3e01c2f1947abcde178686f" dependencies = [ "cc", "tree-sitter-language", @@ -4468,9 +4589,9 @@ dependencies = [ [[package]] name = "tree-sitter-go" -version = "0.23.4" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" dependencies = [ "cc", "tree-sitter-language", @@ -4486,6 +4607,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-hcl" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a7b2cc3d7121553b84309fab9d11b3ff3d420403eef9ae50f9fd1cd9d9cf012" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-html" version = "0.23.2" @@ -4508,9 +4639,9 @@ dependencies = [ [[package]] name = "tree-sitter-javascript" -version = "0.23.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" dependencies = [ "cc", "tree-sitter-language", @@ -4538,15 +4669,25 @@ dependencies = [ [[package]] name = "tree-sitter-language" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" [[package]] name = "tree-sitter-lua" -version = "0.2.0" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea992f4164d83f371ef1239ae178c4d4596c296c09055e9a48bb02a2760403af" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-nix" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb9adf0965fec58e7660cbb3a059dbb12ebeec9459e6dcbae3db004739641e" +checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3" dependencies = [ "cc", "tree-sitter-language", @@ -4554,9 +4695,9 @@ dependencies = [ [[package]] name = "tree-sitter-php" -version = "0.23.11" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f066e94e9272cfe4f1dcb07a1c50c66097eca648f2d7233d299c8ae9ed8c130c" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" dependencies = [ "cc", "tree-sitter-language", @@ -4564,9 +4705,9 @@ dependencies = [ [[package]] name = "tree-sitter-python" -version = "0.23.6" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" dependencies = [ "cc", "tree-sitter-language", @@ -4602,6 +4743,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-solidity" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eacf8875b70879f0cb670c60b233ad0b68752d9e1474e6c3ef168eea8a90b25" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-swift" version = "0.7.1" @@ -4658,9 +4809,9 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-normalization" @@ -4689,6 +4840,33 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc" +dependencies = [ + "base64 0.22.1", + "log", + "percent-encoding", + "rustls", + "rustls-pki-types", + "ureq-proto", + "utf-8", +] + +[[package]] +name = "ureq-proto" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" +dependencies = [ + "base64 0.22.1", + "http", + "httparse", + "log", +] + [[package]] name = "url" version = "2.5.8" @@ -4708,6 +4886,12 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -4722,11 +4906,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.20.0" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.1", "js-sys", "serde_core", "wasm-bindgen", @@ -4786,9 +4970,18 @@ dependencies = [ [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] @@ -4810,9 +5003,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.108" +version = "0.2.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +checksum = "60722a937f594b7fde9adb894d7c092fc1bb6612897c46368d18e7a20208eff2" dependencies = [ "cfg-if", "once_cell", @@ -4823,9 +5016,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.58" +version = "0.4.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" +checksum = "8a89f4650b770e4521aa6573724e2aed4704372151bd0de9d16a3bbabb87441a" dependencies = [ "cfg-if", "futures-util", @@ -4837,9 +5030,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.108" +version = "0.2.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +checksum = "0fac8c6395094b6b91c4af293f4c79371c163f9a6f56184d2c9a85f5a95f3950" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4847,9 +5040,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.108" +version = "0.2.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +checksum = "ab3fabce6159dc20728033842636887e4877688ae94382766e00b180abac9d60" dependencies = [ "bumpalo", "proc-macro2", @@ -4860,18 +5053,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.108" +version = "0.2.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +checksum = "de0e091bdb824da87dc01d967388880d017a0a9bc4f3bdc0d86ee9f9336e3bb5" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.58" +version = "0.3.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45649196a53b0b7a15101d845d44d2dda7374fc1b5b5e2bbf58b7577ff4b346d" +checksum = "9e6fc7a6f61926fa909ee570d4ca194e264545ebbbb4ffd63ac07ba921bff447" dependencies = [ "async-trait", "cast", @@ -4891,9 +5084,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.58" +version = "0.3.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f579cdd0123ac74b94e1a4a72bd963cf30ebac343f2df347da0b8df24cdebed2" +checksum = "f745a117245c232859f203d6c8d52c72d4cfc42de7e668c147ca6b3e45f1157e" dependencies = [ "proc-macro2", "quote", @@ -4902,9 +5095,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-shared" -version = "0.2.108" +version = "0.2.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8145dd1593bf0fb137dbfa85b8be79ec560a447298955877804640e40c2d6ea" +checksum = "1f88e7ae201cc7c291da857532eb1c8712e89494e76ec3967b9805221388e938" + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.13.0", + "wasm-encoder", + "wasmparser", +] [[package]] name = "wasm_sync" @@ -4917,11 +5132,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap 2.13.0", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.85" +version = "0.3.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" +checksum = "705eceb4ce901230f8625bd1d665128056ccbe4b7408faa625eec1ba80f59a97" dependencies = [ "js-sys", "wasm-bindgen", @@ -4943,14 +5170,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.5", + "webpki-roots 1.0.6", ] [[package]] name = "webpki-roots" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -4967,11 +5194,13 @@ dependencies = [ [[package]] name = "whoami" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fae98cf96deed1b7572272dfc777713c249ae40aa1cf8862e091e8b745f5361" +checksum = "d6a5b12f9df4f978d2cfdb1bd3bac52433f44393342d7ee9c25f5a1c14c0f45d" dependencies = [ + "libc", "libredox", + "objc2-system-configuration", "wasite 1.0.2", "web-sys", ] @@ -5095,15 +5324,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets 0.52.6", -] - [[package]] name = "windows-sys" version = "0.60.2" @@ -5310,9 +5530,91 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap 2.13.0", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap 2.13.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.13.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "writeable" @@ -5373,18 +5675,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.37" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7456cf00f0685ad319c5b1693f291a650eaf345e941d082fc4e03df8a03996ac" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.37" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1328722bbf2115db7e19d69ebcc15e795719e2d66b60827c6a69a117365e37a0" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", @@ -5453,6 +5755,6 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.18" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1966f8ac2c1f76987d69a74d0e0f929241c10e78136434e3be70ff7f58f64214" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 8ea3aaa..1e8cbdb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,18 +15,18 @@ members = [ "crates/language", "crates/rule-engine", "crates/services", + "crates/thread", "crates/utils", "crates/wasm", "xtask", - # crates/cloudflare (proprietary) ] [workspace.package] version = "0.0.1" edition = "2024" -rust-version = "1.85" +rust-version = "1.89" description = "A safe, fast, flexible code analysis and code parsing library and tool. Built with tree-sitter, ast-grep, and difftastic in Rust." -documentation = "https://thread.knitli.com" +documentation = "https://docs.rs/thread" readme = "README.md" homepage = "https://knitli.com" repository = "https://github.com/knitli/thread" @@ -59,6 +59,7 @@ include = [ aho-corasick = { version = "1.1.4" } bit-set = { version = "0.8.0" } memchr = { version = "2.7.6", features = ["std"] } +mimalloc = { version = "*", features = ["v3"] } rapidhash = { version = "4.2.1" } regex = { version = "1.12.2" } simdeez = { version = "2.0.0" } @@ -88,6 +89,7 @@ serde_yaml = { package = "serde_yml", version = "0.0.12" } thiserror = { version = "2.0.17" } # Thread thread-ast-engine = { path = "crates/ast-engine", default-features = false } +thread = { path = "crates/thread", default-features = false } thread-flow = { path = "crates/flow", default-features = false } thread-language = { path = "crates/language", default-features = false } thread-rule-engine = { path = "crates/rule-engine", default-features = false } diff --git a/crates/ast-engine/Cargo.toml b/crates/ast-engine/Cargo.toml index 927a017..3291719 100644 --- a/crates/ast-engine/Cargo.toml +++ b/crates/ast-engine/Cargo.toml @@ -34,7 +34,7 @@ thread-utils = { workspace = true, default-features = false, features = [ tree-sitter = { workspace = true, optional = true } [dev-dependencies] -criterion = { version = "0.8.0", features = ["html_reports"] } +criterion = { version = "0.8.2", features = ["html_reports"] } thread-language = { workspace = true, features = ["all-parsers"] } tree-sitter-typescript = "0.23.2" diff --git a/crates/ast-engine/src/matchers/pattern.rs b/crates/ast-engine/src/matchers/pattern.rs index 143f9c5..7d174ae 100644 --- a/crates/ast-engine/src/matchers/pattern.rs +++ b/crates/ast-engine/src/matchers/pattern.rs @@ -255,10 +255,10 @@ impl Matcher for Pattern { node: Node<'tree, D>, env: &mut Cow>, ) -> Option> { - if let Some(k) = self.root_kind { - if node.kind_id() != k { - return None; - } + if let Some(k) = self.root_kind + && node.kind_id() != k + { + return None; } // do not pollute the env if pattern does not match let mut may_write = Cow::Borrowed(env.as_ref()); diff --git a/crates/ast-engine/src/meta_var.rs b/crates/ast-engine/src/meta_var.rs index 2260777..c742ad6 100644 --- a/crates/ast-engine/src/meta_var.rs +++ b/crates/ast-engine/src/meta_var.rs @@ -198,10 +198,10 @@ impl<'t, D: Doc> MetaVarEnv<'t, D> { ) -> bool { let mut env = Cow::Borrowed(self); for (var_id, candidate) in &self.single_matched { - if let Some(m) = var_matchers.get(var_id) { - if m.match_node_with_env(candidate.clone(), &mut env).is_none() { - return false; - } + if let Some(m) = var_matchers.get(var_id) + && m.match_node_with_env(candidate.clone(), &mut env).is_none() + { + return false; } } if let Cow::Owned(env) = env { diff --git a/crates/ast-engine/src/node.rs b/crates/ast-engine/src/node.rs index 0bdf520..dca19b2 100644 --- a/crates/ast-engine/src/node.rs +++ b/crates/ast-engine/src/node.rs @@ -443,10 +443,10 @@ impl<'r, D: Doc> Node<'r, D> { ) -> impl Iterator> + 's { let kinds = pat.potential_kinds(); self.dfs().filter_map(move |cand| { - if let Some(k) = &kinds { - if !k.contains(cand.kind_id().into()) { - return None; - } + if let Some(k) = &kinds + && !k.contains(cand.kind_id().into()) + { + return None; } pat.match_node(cand) }) diff --git a/crates/ast-engine/src/ops.rs b/crates/ast-engine/src/ops.rs index b3a412f..fbfb0c7 100644 --- a/crates/ast-engine/src/ops.rs +++ b/crates/ast-engine/src/ops.rs @@ -88,10 +88,10 @@ impl Matcher for All

{ node: Node<'tree, D>, env: &mut Cow>, ) -> Option> { - if let Some(kinds) = &self.kinds { - if !kinds.contains(node.kind_id().into()) { - return None; - } + if let Some(kinds) = &self.kinds + && !kinds.contains(node.kind_id().into()) + { + return None; } let mut new_env = Cow::Borrowed(env.as_ref()); let all_satisfied = self @@ -146,10 +146,10 @@ impl Matcher for Any { node: Node<'tree, D>, env: &mut Cow>, ) -> Option> { - if let Some(kinds) = &self.kinds { - if !kinds.contains(node.kind_id().into()) { - return None; - } + if let Some(kinds) = &self.kinds + && !kinds.contains(node.kind_id().into()) + { + return None; } let mut new_env = Cow::Borrowed(env.as_ref()); let found = self.patterns.iter().find_map(|p| { diff --git a/crates/flow/Cargo.toml b/crates/flow/Cargo.toml index a98732e..a68024f 100644 --- a/crates/flow/Cargo.toml +++ b/crates/flow/Cargo.toml @@ -49,9 +49,11 @@ base64 = "0.22" bytes = "1.10" deadpool-postgres = { version = "0.14", optional = true } env_logger = "0.11" +futures = { workspace = true } # Logging and observability log = "0.4" -metrics = "0.23" +metrics = "0.24" +mimalloc = { workspace = true, features = ["v3"] } # Optional: query result caching moka = { version = "0.12", features = ["future"], optional = true } # Optional: parallel processing for CLI (not available in workers) @@ -80,7 +82,7 @@ thread-services = { workspace = true, features = [ "ast-grep-backend", "serialization", ] } -thread-utils = { workspace = true } +thread-utils = { workspace = true, features = ["hashers"] } tokio = { workspace = true } # Optional: PostgreSQL storage backend for incremental updates tokio-postgres = { version = "0.7", optional = true } @@ -93,15 +95,14 @@ tracing-subscriber = { version = "0.3", features = [ tree-sitter = { workspace = true } [dev-dependencies] -criterion = "0.5" +criterion = "0.8.2" deadpool-postgres = "0.14" -futures = "0.3" -md5 = "0.7" -metrics-exporter-prometheus = "0.16" -rusqlite = { version = "0.32", features = ["bundled"] } +md5 = "0.8" +metrics-exporter-prometheus = "0.18" +rusqlite = { version = "*", features = ["bundled"] } tempfile = "3.13" -testcontainers = "0.23" -testcontainers-modules = { version = "0.11", features = ["postgres"] } +testcontainers = "*" +testcontainers-modules = { version = "*", features = ["postgres"] } tokio-postgres = "0.7" [features] diff --git a/crates/flow/benches/d1_profiling.rs b/crates/flow/benches/d1_profiling.rs index d4196cb..6309124 100644 --- a/crates/flow/benches/d1_profiling.rs +++ b/crates/flow/benches/d1_profiling.rs @@ -30,9 +30,10 @@ //! - Cache hit rate target: >90% //! - These benchmarks measure infrastructure overhead, not actual D1 API latency -use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use criterion::{Criterion, criterion_group, criterion_main}; use recoco::base::schema::{BasicValueType, EnrichedValueType, FieldSchema, ValueType}; use recoco::base::value::{BasicValue, FieldValues, KeyPart, KeyValue}; +use std::hint::black_box; use std::sync::Arc; use std::time::Duration; use thread_flow::monitoring::performance::PerformanceMetrics; @@ -42,11 +43,13 @@ use thread_flow::targets::d1::D1ExportContext; fn test_field_schema(name: &str, value_type: BasicValueType, nullable: bool) -> FieldSchema { FieldSchema::new( name, + // spellchecker:off EnrichedValueType { typ: ValueType::Basic(value_type), nullable, attrs: Default::default(), }, + // spellchecker:on ) } diff --git a/crates/flow/benches/fingerprint_benchmark.rs b/crates/flow/benches/fingerprint_benchmark.rs index e6c28a5..bc9c02f 100644 --- a/crates/flow/benches/fingerprint_benchmark.rs +++ b/crates/flow/benches/fingerprint_benchmark.rs @@ -15,8 +15,8 @@ //! - Full pipeline with 100% cache hit: <100Β΅s (50x+ speedup vs parse) //! - Memory overhead: <1KB per cached file -use criterion::{Criterion, Throughput, black_box, criterion_group, criterion_main}; -use std::collections::HashMap; +use criterion::{Criterion, Throughput, criterion_group, criterion_main}; +use std::hint::black_box; use thread_services::conversion::compute_content_fingerprint; // ============================================================================ @@ -120,7 +120,7 @@ fn benchmark_cache_lookups(c: &mut Criterion) { let mut group = c.benchmark_group("cache_lookups"); // Create cache with 1000 entries - let mut cache = HashMap::new(); + let mut cache = thread_utils::map_with_capacity(1000); for i in 0..1000 { let code = format!("fn test_{}() {{ println!(\"test\"); }}", i); let fp = compute_content_fingerprint(&code); @@ -182,7 +182,7 @@ fn benchmark_memory_usage(c: &mut Criterion) { // Measure memory overhead of cache group.bench_function("cache_1000_entries", |b| { b.iter(|| { - let mut cache = HashMap::new(); + let mut cache = thread_utils::map_with_capacity(1000); for i in 0..1000 { let code = format!("fn test_{}() {{}}", i); let fp = compute_content_fingerprint(&code); @@ -209,7 +209,7 @@ fn benchmark_cache_hit_rates(c: &mut Criterion) { // Scenario: 0% cache hit (all new files) group.bench_function("0_percent_hit_rate", |b| { b.iter(|| { - let mut cache = HashMap::new(); + let mut cache = thread_utils::get_map(); let mut hits = 0; let mut misses = 0; @@ -228,7 +228,7 @@ fn benchmark_cache_hit_rates(c: &mut Criterion) { }); // Scenario: 100% cache hit (all files seen before) - let mut primed_cache = HashMap::new(); + let mut primed_cache = thread_utils::get_map(); for file in &files { let fp = compute_content_fingerprint(file); primed_cache.insert(fp, ()); diff --git a/crates/flow/benches/incremental_benchmarks.rs b/crates/flow/benches/incremental_benchmarks.rs index cb8e299..0cc18aa 100644 --- a/crates/flow/benches/incremental_benchmarks.rs +++ b/crates/flow/benches/incremental_benchmarks.rs @@ -60,8 +60,8 @@ //! cargo bench -p thread-flow incremental_benchmarks -- cache_hit_rate //! ``` -use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main}; -use std::collections::{HashMap, HashSet}; +use ::std::hint::black_box; +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; use std::path::PathBuf; use thread_flow::incremental::{ AnalysisDefFingerprint, DependencyEdge, DependencyGraph, DependencyType, InMemoryStorage, @@ -254,8 +254,6 @@ fn benchmark_change_detection(c: &mut Criterion) { // Change detection latency let old_fp = AnalysisDefFingerprint::new(b"original content"); - let new_same = AnalysisDefFingerprint::new(b"original content"); - let new_diff = AnalysisDefFingerprint::new(b"modified content"); group.bench_function("detect_no_change", |b| { b.iter(|| black_box(old_fp.content_matches(black_box(b"original content")))); @@ -267,7 +265,8 @@ fn benchmark_change_detection(c: &mut Criterion) { // Graph traversal time (small) let graph = create_linear_chain(100); - let changed = HashSet::from([PathBuf::from("file_99.rs")]); + let changed: thread_utils::RapidSet = + [PathBuf::from("file_99.rs")].into_iter().collect(); group.bench_function("graph_traversal_100_nodes", |b| { b.iter(|| black_box(graph.find_affected_files(black_box(&changed)))); @@ -292,8 +291,6 @@ fn benchmark_change_detection(c: &mut Criterion) { rt.block_on(async { let path = PathBuf::from("file_50.rs"); let new_content = generate_rust_file(50, "medium"); - let new_fp = AnalysisDefFingerprint::new(new_content.as_bytes()); - let old_fp = storage.load_fingerprint(&path).await.unwrap(); let changed = match old_fp { Some(old) => !old.content_matches(new_content.as_bytes()), @@ -321,7 +318,10 @@ fn benchmark_graph_traversal(c: &mut Criterion) { // BFS traversal with different graph sizes for size in [100, 500, 1000].iter() { let graph = create_linear_chain(*size); - let changed = HashSet::from([PathBuf::from(format!("file_{}.rs", size - 1))]); + let changed: thread_utils::RapidSet = + [PathBuf::from(format!("file_{}.rs", size - 1))] + .into_iter() + .collect(); group.bench_with_input(BenchmarkId::new("bfs_linear_chain", size), size, |b, _| { b.iter(|| black_box(graph.find_affected_files(black_box(&changed)))); @@ -330,7 +330,8 @@ fn benchmark_graph_traversal(c: &mut Criterion) { // Affected file calculation (diamond pattern) let diamond = create_diamond_pattern(); - let changed = HashSet::from([PathBuf::from("file_3.rs")]); + let changed: thread_utils::RapidSet = + [PathBuf::from("file_3.rs")].into_iter().collect(); group.bench_function("affected_files_diamond", |b| { b.iter(|| black_box(diamond.find_affected_files(black_box(&changed)))); @@ -350,7 +351,7 @@ fn benchmark_graph_traversal(c: &mut Criterion) { )); } - let changed = HashSet::from([root.clone()]); + let changed: thread_utils::RapidSet = [root.clone()].into_iter().collect(); group.bench_with_input(BenchmarkId::new("wide_fanout", fanout), fanout, |b, _| { b.iter(|| black_box(graph.find_affected_files(black_box(&changed)))); @@ -359,7 +360,8 @@ fn benchmark_graph_traversal(c: &mut Criterion) { // Tree structure traversal let tree = create_tree_structure(4, 3); // depth=4, fanout=3 = 40 nodes - let root_changed = HashSet::from([PathBuf::from("file_0.rs")]); + let root_changed: thread_utils::RapidSet = + [PathBuf::from("file_0.rs")].into_iter().collect(); group.bench_function("tree_traversal_depth4_fanout3", |b| { b.iter(|| black_box(tree.find_affected_files(black_box(&root_changed)))); @@ -381,7 +383,7 @@ fn benchmark_topological_sort(c: &mut Criterion) { // DAG sorting with different sizes for size in [10, 50, 100, 500].iter() { let graph = create_linear_chain(*size); - let all_files: HashSet<_> = (0..*size) + let all_files: thread_utils::RapidSet<_> = (0..*size) .map(|i| PathBuf::from(format!("file_{}.rs", i))) .collect(); @@ -392,7 +394,7 @@ fn benchmark_topological_sort(c: &mut Criterion) { // Diamond pattern sorting let diamond = create_diamond_pattern(); - let diamond_files: HashSet<_> = (0..4) + let diamond_files: thread_utils::RapidSet<_> = (0..4) .map(|i| PathBuf::from(format!("file_{}.rs", i))) .collect(); @@ -402,7 +404,7 @@ fn benchmark_topological_sort(c: &mut Criterion) { // Tree structure sorting let tree = create_tree_structure(4, 3); - let tree_files: HashSet<_> = tree.nodes.keys().cloned().collect(); + let tree_files: thread_utils::RapidSet<_> = tree.nodes.keys().cloned().collect(); group.bench_function("tree_structure", |b| { b.iter(|| black_box(tree.topological_sort(black_box(&tree_files)))); @@ -420,7 +422,10 @@ fn benchmark_topological_sort(c: &mut Criterion) { PathBuf::from("a.rs"), DependencyType::Import, )); - let cyclic_files = HashSet::from([PathBuf::from("a.rs"), PathBuf::from("b.rs")]); + let cyclic_files: thread_utils::RapidSet = + [PathBuf::from("a.rs"), PathBuf::from("b.rs")] + .into_iter() + .collect(); group.bench_function("cycle_detection", |b| { b.iter(|| { @@ -461,7 +466,7 @@ fn benchmark_reanalysis(c: &mut Criterion) { }); // Incremental: only analyze affected files - let changed_files: HashSet<_> = (0..changed_count) + let changed_files: thread_utils::RapidSet<_> = (0..changed_count) .map(|i| PathBuf::from(format!("file_{}.rs", i))) .collect(); @@ -485,7 +490,7 @@ fn benchmark_reanalysis(c: &mut Criterion) { ); // Full: analyze all files regardless of changes - let all_files: HashSet<_> = (0..file_count) + let all_files: thread_utils::RapidSet<_> = (0..file_count) .map(|i| PathBuf::from(format!("file_{}.rs", i))) .collect(); @@ -544,7 +549,6 @@ fn benchmark_cache_hit_rate(c: &mut Criterion) { for i in 0..100 { let path = PathBuf::from(format!("file_{}.rs", i)); let content = generate_rust_file(i, "small"); - let new_fp = AnalysisDefFingerprint::new(content.as_bytes()); if let Some(old_fp) = storage.load_fingerprint(&path).await.unwrap() { if old_fp.content_matches(content.as_bytes()) { @@ -630,11 +634,9 @@ fn benchmark_cache_hit_rate(c: &mut Criterion) { group.bench_function("identical_content_detection", |b| { b.iter(|| { rt.block_on(async { - let path = PathBuf::from("test.rs"); let content = generate_rust_file(0, "small"); let fp1 = AnalysisDefFingerprint::new(content.as_bytes()); - let fp2 = AnalysisDefFingerprint::new(content.as_bytes()); black_box(fp1.content_matches(content.as_bytes())) }) @@ -729,7 +731,8 @@ fn benchmark_performance_validation(c: &mut Criterion) { // Large graph performance (10000 nodes) let large_graph = create_linear_chain(10000); - let changed = HashSet::from([PathBuf::from("file_9999.rs")]); + let changed: thread_utils::RapidSet = + [PathBuf::from("file_9999.rs")].into_iter().collect(); group.bench_function("large_graph_10000_nodes", |b| { b.iter(|| black_box(large_graph.find_affected_files(black_box(&changed)))); @@ -737,7 +740,8 @@ fn benchmark_performance_validation(c: &mut Criterion) { // Deep chain performance (1000 levels) let deep_chain = create_linear_chain(1000); - let deep_changed = HashSet::from([PathBuf::from("file_999.rs")]); + let deep_changed: thread_utils::RapidSet = + [PathBuf::from("file_999.rs")].into_iter().collect(); group.bench_function("deep_chain_1000_levels", |b| { b.iter(|| black_box(deep_chain.find_affected_files(black_box(&deep_changed)))); diff --git a/crates/flow/benches/load_test.rs b/crates/flow/benches/load_test.rs index 6c323e6..4e432ba 100644 --- a/crates/flow/benches/load_test.rs +++ b/crates/flow/benches/load_test.rs @@ -10,7 +10,8 @@ //! - Incremental updates //! - Memory usage under load -use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main}; +use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; +use std::hint::black_box; use std::time::Duration; use thread_services::conversion::compute_content_fingerprint; diff --git a/crates/flow/benches/parse_benchmark.rs b/crates/flow/benches/parse_benchmark.rs index c5ef7f4..e8eea91 100644 --- a/crates/flow/benches/parse_benchmark.rs +++ b/crates/flow/benches/parse_benchmark.rs @@ -25,9 +25,10 @@ //! cargo bench -p thread-flow -- recoco # Run ReCoco integration benchmarks //! ``` -use criterion::{Criterion, Throughput, black_box, criterion_group, criterion_main}; +use criterion::{Criterion, Throughput, criterion_group, criterion_main}; use recoco::base::value::{BasicValue, Value}; use recoco::ops::interface::SimpleFunctionExecutor; +use std::hint::black_box; use thread_ast_engine::tree_sitter::LanguageExt; use thread_flow::functions::parse::ThreadParseExecutor; diff --git a/crates/flow/src/bridge.rs b/crates/flow/src/bridge.rs index 62d8f5f..4c93600 100644 --- a/crates/flow/src/bridge.rs +++ b/crates/flow/src/bridge.rs @@ -38,7 +38,7 @@ impl CodeAnalyzer for CocoIn supports_incremental_analysis: true, supported_analysis_depths: vec![], // TODO performance_profile: thread_services::traits::AnalysisPerformanceProfile::Balanced, - capability_flags: std::collections::HashMap::new(), + capability_flags: thread_utils::get_map(), } } diff --git a/crates/flow/src/incremental/analyzer.rs b/crates/flow/src/incremental/analyzer.rs index 7504463..26c797b 100644 --- a/crates/flow/src/incremental/analyzer.rs +++ b/crates/flow/src/incremental/analyzer.rs @@ -45,10 +45,11 @@ use super::dependency_builder::DependencyGraphBuilder; use super::graph::DependencyGraph; use super::storage::{StorageBackend, StorageError}; use super::types::AnalysisDefFingerprint; +use futures::stream::{self, StreamExt}; use metrics::{counter, gauge, histogram}; -use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::time::Instant; +use thread_utils::RapidSet; use tracing::{debug, info, instrument, warn}; // ─── Error Types ───────────────────────────────────────────────────────────── @@ -237,28 +238,31 @@ impl IncrementalAnalyzer { return Ok(AnalysisResult::empty()); } + let concurrency = std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(4); + + let paths_owned = paths.to_vec(); + let file_data = stream::iter(paths_owned) + .map(|path| async move { + let content = tokio::fs::read(&path).await?; + let fp = AnalysisDefFingerprint::new(&content); + Ok::<(PathBuf, AnalysisDefFingerprint), std::io::Error>((path, fp)) + }) + .buffer_unordered(concurrency) + .collect::>>() + .await; + let mut changed_files = Vec::new(); let mut cache_hits = 0; let mut cache_total = 0; - for path in paths { + for data in file_data { + let (path, current_fp) = data.map_err(AnalyzerError::Io)?; debug!(file_path = ?path, "analyzing file"); - // Check if file exists - if !tokio::fs::try_exists(path).await? { - return Err(AnalyzerError::Io(std::io::Error::new( - std::io::ErrorKind::NotFound, - format!("File not found: {}", path.display()), - ))); - } - - // Read file content - let content = tokio::fs::read(path).await?; - - // Compute current fingerprint - let current_fp = AnalysisDefFingerprint::new(&content); // Load stored fingerprint - let stored_fp = self.storage.load_fingerprint(path).await?; + let stored_fp = self.storage.load_fingerprint(&path).await?; cache_total += 1; @@ -267,23 +271,23 @@ impl IncrementalAnalyzer { // Compare fingerprints if stored.fingerprint().as_slice() != current_fp.fingerprint().as_slice() { // Content changed - save new fingerprint - info!("cache miss - content changed"); + info!(file = ?path, "cache miss - content changed"); counter!("cache_misses_total").increment(1); changed_files.push(path.clone()); - let _ = self.storage.save_fingerprint(path, ¤t_fp).await; + let _ = self.storage.save_fingerprint(&path, ¤t_fp).await; } else { // Cache hit - no change - info!("cache hit"); + info!(file = ?path, "cache hit"); counter!("cache_hits_total").increment(1); cache_hits += 1; } } None => { // New file - no cached fingerprint, save it - info!("cache miss - new file"); + info!(file = ?path, "cache miss - new file"); counter!("cache_misses_total").increment(1); changed_files.push(path.clone()); - let _ = self.storage.save_fingerprint(path, ¤t_fp).await; + let _ = self.storage.save_fingerprint(&path, ¤t_fp).await; } } } @@ -352,8 +356,8 @@ impl IncrementalAnalyzer { return Ok(Vec::new()); } - // Convert to HashSet for efficient lookup - let changed_set: HashSet = changed.iter().cloned().collect(); + // Convert to RapidSet for efficient lookup + let changed_set: RapidSet = changed.iter().cloned().collect(); // Use graph's BFS traversal to find affected files let affected_set = self.dependency_graph.find_affected_files(&changed_set); @@ -391,8 +395,8 @@ impl IncrementalAnalyzer { return Ok(()); } - // Convert to HashSet for topological sort - let file_set: HashSet = files.iter().cloned().collect(); + // Convert to RapidSet for topological sort + let file_set: RapidSet = files.iter().cloned().collect(); // Sort files in dependency order (dependencies before dependents) let sorted_files = self diff --git a/crates/flow/src/incremental/backends/d1.rs b/crates/flow/src/incremental/backends/d1.rs index 8176bad..14726b6 100644 --- a/crates/flow/src/incremental/backends/d1.rs +++ b/crates/flow/src/incremental/backends/d1.rs @@ -50,7 +50,6 @@ use crate::incremental::types::{ }; use async_trait::async_trait; use recoco::utils::fingerprint::Fingerprint; -use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -466,10 +465,10 @@ impl StorageBackend for D1IncrementalBackend { ) .await?; - let source_files: HashSet = src_result + let source_files: thread_utils::RapidSet = src_result .results .iter() - .filter_map(|r| r["source_path"].as_str().map(|s| PathBuf::from(s))) + .filter_map(|r| r["source_path"].as_str().map(PathBuf::from)) .collect(); Ok(Some(AnalysisDefFingerprint { @@ -557,8 +556,8 @@ impl StorageBackend for D1IncrementalBackend { .await?; // Build source files map grouped by fingerprint_path. - let mut source_map: std::collections::HashMap> = - std::collections::HashMap::new(); + let mut source_map: thread_utils::RapidMap> = + thread_utils::get_map(); for row in &src_result.results { if let (Some(fp_path), Some(src_path)) = ( row["fingerprint_path"].as_str(), diff --git a/crates/flow/src/incremental/backends/postgres.rs b/crates/flow/src/incremental/backends/postgres.rs index 1358d14..a3d8fc7 100644 --- a/crates/flow/src/incremental/backends/postgres.rs +++ b/crates/flow/src/incremental/backends/postgres.rs @@ -36,8 +36,8 @@ use crate::incremental::types::{ use async_trait::async_trait; use deadpool_postgres::{Config, Pool, Runtime}; use recoco::utils::fingerprint::Fingerprint; -use std::collections::HashSet; use std::path::{Path, PathBuf}; +use thread_utils::RapidSet; use tokio_postgres::NoTls; /// PostgreSQL storage backend for the incremental update system. @@ -325,7 +325,7 @@ impl StorageBackend for PostgresIncrementalBackend { .await .map_err(pg_error)?; - let source_files: HashSet = src_rows + let source_files: RapidSet = src_rows .iter() .map(|r| { let s: String = r.get(0); @@ -491,8 +491,8 @@ impl StorageBackend for PostgresIncrementalBackend { let src_rows = client.query(&src_stmt, &[]).await.map_err(pg_error)?; // Build source files map grouped by fingerprint_path - let mut source_map: std::collections::HashMap> = - std::collections::HashMap::new(); + let mut source_map: thread_utils::RapidMap> = + thread_utils::get_map(); for row in &src_rows { let fp_path: String = row.get(0); let src_path: String = row.get(1); diff --git a/crates/flow/src/incremental/extractors/go.rs b/crates/flow/src/incremental/extractors/go.rs index 68e2a28..17521fa 100644 --- a/crates/flow/src/incremental/extractors/go.rs +++ b/crates/flow/src/incremental/extractors/go.rs @@ -179,10 +179,10 @@ impl GoDependencyExtractor { "import_spec_list" => { let mut list_cursor = child.walk(); for spec in child.children(&mut list_cursor) { - if spec.kind() == "import_spec" { - if let Some(info) = self.parse_import_spec(spec, source) { - imports.push(info); - } + if spec.kind() == "import_spec" + && let Some(info) = self.parse_import_spec(spec, source) + { + imports.push(info); } } } @@ -254,11 +254,11 @@ impl GoDependencyExtractor { import_path: &str, ) -> Result { // Module-internal import - if let Some(ref module) = self.module_path { - if let Some(relative) = import_path.strip_prefix(module) { - let relative = relative.strip_prefix('/').unwrap_or(relative); - return Ok(PathBuf::from(relative)); - } + if let Some(ref module) = self.module_path + && let Some(relative) = import_path.strip_prefix(module) + { + let relative = relative.strip_prefix('/').unwrap_or(relative); + return Ok(PathBuf::from(relative)); } // Vendor mode for external imports diff --git a/crates/flow/src/incremental/extractors/typescript.rs b/crates/flow/src/incremental/extractors/typescript.rs index 6cff078..1bdda4e 100644 --- a/crates/flow/src/incremental/extractors/typescript.rs +++ b/crates/flow/src/incremental/extractors/typescript.rs @@ -349,10 +349,10 @@ impl TypeScriptDependencyExtractor { ) { let mut cursor = node.walk(); for child in node.children(&mut cursor) { - if child.kind() == "import_specifier" { - if let Some(symbol) = self.extract_import_specifier(child, source) { - symbols.push(symbol); - } + if child.kind() == "import_specifier" + && let Some(symbol) = self.extract_import_specifier(child, source) + { + symbols.push(symbol); } } } @@ -370,13 +370,13 @@ impl TypeScriptDependencyExtractor { let children: Vec<_> = node.children(&mut cursor).collect(); for child in &children { - if child.kind() == "identifier" { - if let Ok(name) = child.utf8_text(source) { - if imported_name.is_none() { - imported_name = Some(name.to_string()); - } else { - local_name = Some(name.to_string()); - } + if child.kind() == "identifier" + && let Ok(name) = child.utf8_text(source) + { + if imported_name.is_none() { + imported_name = Some(name.to_string()); + } else { + local_name = Some(name.to_string()); } } } @@ -403,10 +403,10 @@ impl TypeScriptDependencyExtractor { for child in node.children(&mut cursor) { match child.kind() { "identifier" => { - if let Ok(text) = child.utf8_text(source) { - if text == "require" { - is_require = true; - } + if let Ok(text) = child.utf8_text(source) + && text == "require" + { + is_require = true; } } "import" => { @@ -529,13 +529,13 @@ impl TypeScriptDependencyExtractor { let children: Vec<_> = node.children(&mut cursor).collect(); for child in &children { - if child.kind() == "property_identifier" || child.kind() == "identifier" { - if let Ok(name) = child.utf8_text(source) { - if imported_name.is_none() { - imported_name = Some(name.to_string()); - } else { - local_name = Some(name.to_string()); - } + if (child.kind() == "property_identifier" || child.kind() == "identifier") + && let Ok(name) = child.utf8_text(source) + { + if imported_name.is_none() { + imported_name = Some(name.to_string()); + } else { + local_name = Some(name.to_string()); } } } @@ -654,20 +654,20 @@ impl TypeScriptDependencyExtractor { if has_default && exports.is_empty() { let mut cursor = node.walk(); for child in node.children(&mut cursor) { - if child.kind() == "identifier" + if (child.kind() == "identifier" || child.kind() == "number" - || child.kind() == "string" + || child.kind() == "string") + && let Ok(text) = child.utf8_text(source) + && text != "default" + && text != "export" + && text != "*" { - if let Ok(text) = child.utf8_text(source) { - if text != "default" && text != "export" && text != "*" { - exports.push(ExportInfo { - symbol_name: "default".to_string(), - is_default: true, - export_type: ExportType::Default, - }); - break; - } - } + exports.push(ExportInfo { + symbol_name: "default".to_string(), + is_default: true, + export_type: ExportType::Default, + }); + break; } } } @@ -693,14 +693,14 @@ impl TypeScriptDependencyExtractor { ) { let mut cursor = node.walk(); for child in node.children(&mut cursor) { - if child.kind() == "variable_declarator" { - if let Some(name) = self.extract_variable_name(child, source) { - exports.push(ExportInfo { - symbol_name: name, - is_default: false, - export_type: ExportType::Named, - }); - } + if child.kind() == "variable_declarator" + && let Some(name) = self.extract_variable_name(child, source) + { + exports.push(ExportInfo { + symbol_name: name, + is_default: false, + export_type: ExportType::Named, + }); } } } @@ -741,18 +741,18 @@ impl TypeScriptDependencyExtractor { ) { let mut cursor = node.walk(); for child in node.children(&mut cursor) { - if child.kind() == "export_specifier" { - if let Some(name) = self.extract_export_specifier_name(child, source) { - exports.push(ExportInfo { - symbol_name: name, - is_default: false, - export_type: if is_reexport { - ExportType::NamedReexport - } else { - ExportType::Named - }, - }); - } + if child.kind() == "export_specifier" + && let Some(name) = self.extract_export_specifier_name(child, source) + { + exports.push(ExportInfo { + symbol_name: name, + is_default: false, + export_type: if is_reexport { + ExportType::NamedReexport + } else { + ExportType::Named + }, + }); } } } diff --git a/crates/flow/src/incremental/graph.rs b/crates/flow/src/incremental/graph.rs index 5d6bd58..3a55c7e 100644 --- a/crates/flow/src/incremental/graph.rs +++ b/crates/flow/src/incremental/graph.rs @@ -18,9 +18,10 @@ use super::types::{AnalysisDefFingerprint, DependencyEdge, DependencyStrength}; use metrics::gauge; -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::VecDeque; use std::fmt; use std::path::{Path, PathBuf}; +use thread_utils::{RapidMap, RapidSet}; /// Errors that can occur during dependency graph operations. #[derive(Debug)] @@ -59,7 +60,7 @@ impl std::error::Error for GraphError {} /// use thread_flow::incremental::graph::DependencyGraph; /// use thread_flow::incremental::types::{DependencyEdge, DependencyType}; /// use std::path::PathBuf; -/// use std::collections::HashSet; +/// use thread_utils::RapidSet; /// /// let mut graph = DependencyGraph::new(); /// @@ -83,16 +84,16 @@ impl std::error::Error for GraphError {} #[derive(Debug, Clone)] pub struct DependencyGraph { /// Fingerprint state for each tracked file. - pub nodes: HashMap, + pub nodes: RapidMap, /// All dependency edges in the graph. pub edges: Vec, /// Forward adjacency: file -> files it depends on. - forward_adj: HashMap>, + forward_adj: RapidMap>, /// Reverse adjacency: file -> files that depend on it. - reverse_adj: HashMap>, + reverse_adj: RapidMap>, } impl DependencyGraph { @@ -109,10 +110,10 @@ impl DependencyGraph { /// ``` pub fn new() -> Self { Self { - nodes: HashMap::new(), + nodes: thread_utils::get_map(), edges: Vec::new(), - forward_adj: HashMap::new(), - reverse_adj: HashMap::new(), + forward_adj: thread_utils::get_map(), + reverse_adj: thread_utils::get_map(), } } @@ -246,7 +247,7 @@ impl DependencyGraph { /// use thread_flow::incremental::graph::DependencyGraph; /// use thread_flow::incremental::types::{DependencyEdge, DependencyType}; /// use std::path::PathBuf; - /// use std::collections::HashSet; + /// use thread_utils::RapidSet; /// /// let mut graph = DependencyGraph::new(); /// @@ -259,15 +260,15 @@ impl DependencyGraph { /// )); /// /// // Change C -> affects B and A - /// let changed = HashSet::from([PathBuf::from("C")]); + /// let changed = RapidSet::from([PathBuf::from("C")]); /// let affected = graph.find_affected_files(&changed); /// assert!(affected.contains(&PathBuf::from("A"))); /// assert!(affected.contains(&PathBuf::from("B"))); /// assert!(affected.contains(&PathBuf::from("C"))); /// ``` - pub fn find_affected_files(&self, changed_files: &HashSet) -> HashSet { - let mut affected = HashSet::new(); - let mut visited = HashSet::new(); + pub fn find_affected_files(&self, changed_files: &RapidSet) -> RapidSet { + let mut affected = thread_utils::get_set(); + let mut visited = thread_utils::get_set(); let mut queue: VecDeque = changed_files.iter().cloned().collect(); while let Some(file) = queue.pop_front() { @@ -313,7 +314,7 @@ impl DependencyGraph { /// use thread_flow::incremental::graph::DependencyGraph; /// use thread_flow::incremental::types::{DependencyEdge, DependencyType}; /// use std::path::PathBuf; - /// use std::collections::HashSet; + /// use thread_utils::RapidSet; /// /// let mut graph = DependencyGraph::new(); /// // A depends on B, B depends on C @@ -324,7 +325,7 @@ impl DependencyGraph { /// PathBuf::from("B"), PathBuf::from("C"), DependencyType::Import, /// )); /// - /// let files = HashSet::from([ + /// let files = RapidSet::from([ /// PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C"), /// ]); /// let sorted = graph.topological_sort(&files).unwrap(); @@ -335,10 +336,10 @@ impl DependencyGraph { /// assert!(pos_c < pos_b); /// assert!(pos_b < pos_a); /// ``` - pub fn topological_sort(&self, files: &HashSet) -> Result, GraphError> { + pub fn topological_sort(&self, files: &RapidSet) -> Result, GraphError> { let mut sorted = Vec::new(); - let mut visited = HashSet::new(); - let mut temp_mark = HashSet::new(); + let mut visited = thread_utils::get_set(); + let mut temp_mark = thread_utils::get_set(); for file in files { if !visited.contains(file) { @@ -408,9 +409,9 @@ impl DependencyGraph { fn visit_node( &self, file: &Path, - subset: &HashSet, - visited: &mut HashSet, - temp_mark: &mut HashSet, + subset: &RapidSet, + visited: &mut RapidSet, + temp_mark: &mut RapidSet, sorted: &mut Vec, ) -> Result<(), GraphError> { let file_buf = file.to_path_buf(); @@ -551,7 +552,7 @@ mod tests { let deps = graph.get_dependencies(Path::new("main.rs")); assert_eq!(deps.len(), 2); - let dep_targets: HashSet<_> = deps.iter().map(|e| &e.to).collect(); + let dep_targets: RapidSet<_> = deps.iter().map(|e| &e.to).collect(); assert!(dep_targets.contains(&PathBuf::from("utils.rs"))); assert!(dep_targets.contains(&PathBuf::from("config.rs"))); } @@ -596,7 +597,7 @@ mod tests { let dependents = graph.get_dependents(Path::new("utils.rs")); assert_eq!(dependents.len(), 2); - let dependent_sources: HashSet<_> = dependents.iter().map(|e| &e.from).collect(); + let dependent_sources: RapidSet<_> = dependents.iter().map(|e| &e.from).collect(); assert!(dependent_sources.contains(&PathBuf::from("main.rs"))); assert!(dependent_sources.contains(&PathBuf::from("lib.rs"))); } @@ -628,7 +629,8 @@ mod tests { DependencyType::Import, )); - let changed = HashSet::from([PathBuf::from("utils.rs")]); + let changed: thread_utils::RapidSet = + [PathBuf::from("utils.rs")].into_iter().collect(); let affected = graph.find_affected_files(&changed); assert!(affected.contains(&PathBuf::from("utils.rs"))); @@ -652,7 +654,7 @@ mod tests { DependencyType::Import, )); - let changed = HashSet::from([PathBuf::from("C")]); + let changed: thread_utils::RapidSet = [PathBuf::from("C")].into_iter().collect(); let affected = graph.find_affected_files(&changed); assert_eq!(affected.len(), 3); @@ -687,7 +689,7 @@ mod tests { DependencyType::Import, )); - let changed = HashSet::from([PathBuf::from("D")]); + let changed: thread_utils::RapidSet = [PathBuf::from("D")].into_iter().collect(); let affected = graph.find_affected_files(&changed); assert_eq!(affected.len(), 4); @@ -714,7 +716,7 @@ mod tests { DependencyType::Import, )); - let changed = HashSet::from([PathBuf::from("B")]); + let changed: thread_utils::RapidSet = [PathBuf::from("B")].into_iter().collect(); let affected = graph.find_affected_files(&changed); assert!(affected.contains(&PathBuf::from("A"))); @@ -739,7 +741,7 @@ mod tests { DependencyType::Export, // Weak )); - let changed = HashSet::from([PathBuf::from("B")]); + let changed: thread_utils::RapidSet = [PathBuf::from("B")].into_iter().collect(); let affected = graph.find_affected_files(&changed); assert!(affected.contains(&PathBuf::from("A"))); @@ -760,7 +762,7 @@ mod tests { DependencyType::Import, )); - let changed = HashSet::new(); + let changed = thread_utils::get_set(); let affected = graph.find_affected_files(&changed); assert!(affected.is_empty()); } @@ -768,7 +770,8 @@ mod tests { #[test] fn test_find_affected_files_unknown_file() { let graph = DependencyGraph::new(); - let changed = HashSet::from([PathBuf::from("nonexistent.rs")]); + let changed: thread_utils::RapidSet = + [PathBuf::from("nonexistent.rs")].into_iter().collect(); let affected = graph.find_affected_files(&changed); // The changed file itself is always included @@ -792,7 +795,9 @@ mod tests { DependencyType::Import, )); - let changed = HashSet::from([PathBuf::from("C"), PathBuf::from("D")]); + let changed: thread_utils::RapidSet = [PathBuf::from("C"), PathBuf::from("D")] + .into_iter() + .collect(); let affected = graph.find_affected_files(&changed); assert_eq!(affected.len(), 4); @@ -816,7 +821,10 @@ mod tests { DependencyType::Import, )); - let files = HashSet::from([PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C")]); + let files: thread_utils::RapidSet = + [PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C")] + .into_iter() + .collect(); let sorted = graph.topological_sort(&files).unwrap(); assert_eq!(sorted.len(), 3); @@ -855,12 +863,14 @@ mod tests { DependencyType::Import, )); - let files = HashSet::from([ + let files: thread_utils::RapidSet = [ PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C"), PathBuf::from("D"), - ]); + ] + .into_iter() + .collect(); let sorted = graph.topological_sort(&files).unwrap(); assert_eq!(sorted.len(), 4); @@ -893,12 +903,14 @@ mod tests { DependencyType::Import, )); - let files = HashSet::from([ + let files: thread_utils::RapidSet = [ PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C"), PathBuf::from("D"), - ]); + ] + .into_iter() + .collect(); let sorted = graph.topological_sort(&files).unwrap(); assert_eq!(sorted.len(), 4); @@ -916,7 +928,8 @@ mod tests { #[test] fn test_topological_sort_single_node() { let graph = DependencyGraph::new(); - let files = HashSet::from([PathBuf::from("only.rs")]); + let files: thread_utils::RapidSet = + [PathBuf::from("only.rs")].into_iter().collect(); let sorted = graph.topological_sort(&files).unwrap(); assert_eq!(sorted, vec![PathBuf::from("only.rs")]); @@ -925,7 +938,7 @@ mod tests { #[test] fn test_topological_sort_empty_set() { let graph = DependencyGraph::new(); - let files = HashSet::new(); + let files = thread_utils::get_set(); let sorted = graph.topological_sort(&files).unwrap(); assert!(sorted.is_empty()); @@ -953,7 +966,9 @@ mod tests { )); // Sort only A and B - let files = HashSet::from([PathBuf::from("A"), PathBuf::from("B")]); + let files: thread_utils::RapidSet = [PathBuf::from("A"), PathBuf::from("B")] + .into_iter() + .collect(); let sorted = graph.topological_sort(&files).unwrap(); assert_eq!(sorted.len(), 2); @@ -981,7 +996,9 @@ mod tests { DependencyType::Import, )); - let files = HashSet::from([PathBuf::from("A"), PathBuf::from("B")]); + let files: thread_utils::RapidSet = [PathBuf::from("A"), PathBuf::from("B")] + .into_iter() + .collect(); let result = graph.topological_sort(&files); assert!(result.is_err()); @@ -989,7 +1006,7 @@ mod tests { match err { GraphError::CyclicDependency(path) => { assert!( - path == PathBuf::from("A") || path == PathBuf::from("B"), + path == Path::new("A") || path == Path::new("B"), "Cycle should involve A or B, got: {}", path.display() ); @@ -1018,7 +1035,10 @@ mod tests { DependencyType::Import, )); - let files = HashSet::from([PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C")]); + let files: thread_utils::RapidSet = + [PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C")] + .into_iter() + .collect(); let result = graph.topological_sort(&files); assert!(result.is_err()); } @@ -1034,7 +1054,7 @@ mod tests { DependencyType::Import, )); - let files = HashSet::from([PathBuf::from("A")]); + let files: thread_utils::RapidSet = [PathBuf::from("A")].into_iter().collect(); let result = graph.topological_sort(&files); assert!(result.is_err()); } diff --git a/crates/flow/src/incremental/invalidation.rs b/crates/flow/src/incremental/invalidation.rs index 222e2c6..4fdc3ca 100644 --- a/crates/flow/src/incremental/invalidation.rs +++ b/crates/flow/src/incremental/invalidation.rs @@ -17,9 +17,9 @@ use super::graph::{DependencyGraph, GraphError}; use metrics::histogram; -use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; use std::time::Instant; +use thread_utils::{RapidMap, RapidSet}; use tracing::{info, warn}; /// Errors that can occur during invalidation detection. @@ -45,6 +45,7 @@ pub enum InvalidationError { /// ```rust /// use thread_flow::incremental::invalidation::InvalidationDetector; /// use thread_flow::incremental::DependencyGraph; +/// use thread_utils::RapishSet; /// use std::path::PathBuf; /// /// let graph = DependencyGraph::new(); @@ -174,7 +175,7 @@ impl InvalidationDetector { ); // Step 1: Find all files transitively affected by changes - let changed_set: HashSet = changed_files.iter().cloned().collect(); + let changed_set: RapidSet = changed_files.iter().cloned().collect(); let affected = self.graph.find_affected_files(&changed_set); let invalidated_files: Vec = affected.iter().cloned().collect(); @@ -258,7 +259,7 @@ impl InvalidationDetector { /// ``` pub fn topological_sort(&self, files: &[PathBuf]) -> Result, InvalidationError> { // Delegate to DependencyGraph's topological sort and map errors - let files_set: HashSet = files.iter().cloned().collect(); + let files_set: RapidSet = files.iter().cloned().collect(); self.graph .topological_sort(&files_set) @@ -297,8 +298,8 @@ impl InvalidationDetector { /// ``` pub fn propagate_invalidation(&self, root: &Path) -> Vec { // Delegate to DependencyGraph's find_affected_files for single root - let root_set = HashSet::from([root.to_path_buf()]); - let affected = self.graph.find_affected_files(&root_set); + let root_set: RapidSet = [root.to_path_buf()].into_iter().collect(); + let affected: RapidSet = self.graph.find_affected_files(&root_set); affected.into_iter().collect() } @@ -317,7 +318,7 @@ impl InvalidationDetector { /// /// Vector of strongly connected components, where each component /// is a vector of file paths involved in a cycle. - fn find_strongly_connected_components(&self, files: &HashSet) -> Vec> { + fn find_strongly_connected_components(&self, files: &RapidSet) -> Vec> { // Tarjan's SCC algorithm for finding all cycles let mut state = TarjanState::new(); let mut sccs = Vec::new(); @@ -396,20 +397,20 @@ impl InvalidationDetector { /// State for Tarjan's SCC algorithm struct TarjanState { index_counter: usize, - indices: HashMap, - lowlinks: HashMap, + indices: RapidMap, + lowlinks: RapidMap, stack: Vec, - on_stack: HashSet, + on_stack: RapidSet, } impl TarjanState { fn new() -> Self { Self { index_counter: 0, - indices: HashMap::new(), - lowlinks: HashMap::new(), + indices: thread_utils::get_map(), + lowlinks: thread_utils::get_map(), stack: Vec::new(), - on_stack: HashSet::new(), + on_stack: thread_utils::get_set(), } } } @@ -1068,7 +1069,9 @@ mod tests { )); let detector = InvalidationDetector::new(graph); - let files = HashSet::from([PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C")]); + let files: RapidSet = [PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C")] + .into_iter() + .collect(); let sccs = detector.find_strongly_connected_components(&files); // No non-trivial SCCs (all components have size 1) @@ -1091,7 +1094,9 @@ mod tests { )); let detector = InvalidationDetector::new(graph); - let files = HashSet::from([PathBuf::from("A"), PathBuf::from("B")]); + let files: RapidSet = [PathBuf::from("A"), PathBuf::from("B")] + .into_iter() + .collect(); let sccs = detector.find_strongly_connected_components(&files); assert_eq!(sccs.len(), 1); @@ -1111,7 +1116,7 @@ mod tests { )); let detector = InvalidationDetector::new(graph); - let files = HashSet::from([PathBuf::from("A")]); + let files: RapidSet = [PathBuf::from("A")].into_iter().collect(); let sccs = detector.find_strongly_connected_components(&files); // Self-loop creates a non-trivial SCC of size 1 @@ -1146,12 +1151,14 @@ mod tests { )); let detector = InvalidationDetector::new(graph); - let files = HashSet::from([ + let files: RapidSet = [ PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C"), PathBuf::from("D"), - ]); + ] + .into_iter() + .collect(); let sccs = detector.find_strongly_connected_components(&files); assert_eq!(sccs.len(), 2); @@ -1183,12 +1190,14 @@ mod tests { )); let detector = InvalidationDetector::new(graph); - let files = HashSet::from([ + let files: RapidSet = [ PathBuf::from("A"), PathBuf::from("B"), PathBuf::from("C"), PathBuf::from("D"), - ]); + ] + .into_iter() + .collect(); let sccs = detector.find_strongly_connected_components(&files); // Should find one SCC containing B and C diff --git a/crates/flow/src/incremental/storage.rs b/crates/flow/src/incremental/storage.rs index 7f577f2..6f8cf51 100644 --- a/crates/flow/src/incremental/storage.rs +++ b/crates/flow/src/incremental/storage.rs @@ -168,7 +168,7 @@ pub trait StorageBackend: Send + Sync + std::fmt::Debug { /// ``` #[derive(Debug)] pub struct InMemoryStorage { - fingerprints: tokio::sync::RwLock>, + fingerprints: tokio::sync::RwLock>, edges: tokio::sync::RwLock>, } @@ -176,7 +176,7 @@ impl InMemoryStorage { /// Creates a new empty in-memory storage backend. pub fn new() -> Self { Self { - fingerprints: tokio::sync::RwLock::new(std::collections::HashMap::new()), + fingerprints: tokio::sync::RwLock::new(thread_utils::get_map()), edges: tokio::sync::RwLock::new(Vec::new()), } } diff --git a/crates/flow/src/incremental/types.rs b/crates/flow/src/incremental/types.rs index 26c2014..fc1b462 100644 --- a/crates/flow/src/incremental/types.rs +++ b/crates/flow/src/incremental/types.rs @@ -9,8 +9,8 @@ use recoco::utils::fingerprint::{Fingerprint, Fingerprinter}; use serde::{Deserialize, Serialize}; -use std::collections::HashSet; use std::path::{Path, PathBuf}; +use thread_utils::RapidSet; /// Tracks the fingerprint and source files for an analysis result. /// @@ -32,7 +32,7 @@ use std::path::{Path, PathBuf}; pub struct AnalysisDefFingerprint { /// Source files that contribute to this analysis result. /// Used to determine invalidation scope when dependencies change. - pub source_files: HashSet, + pub source_files: RapidSet, /// Content fingerprint of the analyzed file (Blake3, 16 bytes). /// Combines file content hash for change detection. @@ -197,7 +197,7 @@ impl AnalysisDefFingerprint { let mut fingerprinter = Fingerprinter::default(); fingerprinter.write_raw_bytes(content); Self { - source_files: HashSet::new(), + source_files: thread_utils::get_set(), fingerprint: fingerprinter.into_fingerprint(), last_analyzed: None, } @@ -217,14 +217,14 @@ impl AnalysisDefFingerprint { /// /// ```rust /// use thread_flow::incremental::types::AnalysisDefFingerprint; - /// use std::collections::HashSet; + /// use thread_utils::RapidSet; /// use std::path::PathBuf; /// - /// let sources = HashSet::from([PathBuf::from("dep.rs")]); + /// let sources = RapidSet::from([PathBuf::from("dep.rs")]); /// let fp = AnalysisDefFingerprint::with_sources(b"content", sources); /// assert_eq!(fp.source_files.len(), 1); /// ``` - pub fn with_sources(content: &[u8], source_files: HashSet) -> Self { + pub fn with_sources(content: &[u8], source_files: RapidSet) -> Self { let mut fingerprinter = Fingerprinter::default(); fingerprinter.write_raw_bytes(content); Self { @@ -496,10 +496,12 @@ mod tests { #[test] fn test_fingerprint_with_sources() { - let sources = HashSet::from([ + let sources: RapidSet = [ PathBuf::from("src/utils.rs"), PathBuf::from("src/config.rs"), - ]); + ] + .into_iter() + .collect(); let fp = AnalysisDefFingerprint::with_sources(b"content", sources.clone()); assert_eq!(fp.source_files, sources); assert!(fp.content_matches(b"content")); @@ -521,7 +523,7 @@ mod tests { #[test] fn test_fingerprint_update_preserves_source_files() { - let sources = HashSet::from([PathBuf::from("dep.rs")]); + let sources: RapidSet = [PathBuf::from("dep.rs")].into_iter().collect(); let fp = AnalysisDefFingerprint::with_sources(b"old", sources.clone()); let updated = fp.update_fingerprint(b"new"); assert_eq!(updated.source_files, sources); @@ -558,7 +560,9 @@ mod tests { fn test_fingerprint_remove_source_file() { let mut fp = AnalysisDefFingerprint::with_sources( b"content", - HashSet::from([PathBuf::from("a.rs"), PathBuf::from("b.rs")]), + [PathBuf::from("a.rs"), PathBuf::from("b.rs")] + .into_iter() + .collect::>(), ); assert!(fp.remove_source_file(Path::new("a.rs"))); @@ -574,8 +578,8 @@ mod tests { let mut fp = AnalysisDefFingerprint::new(b"content"); assert!(fp.last_analyzed.is_none()); - fp.set_last_analyzed(1706400000_000_000); // Some timestamp - assert_eq!(fp.last_analyzed, Some(1706400000_000_000)); + fp.set_last_analyzed(1_706_400_000_000_000); // Some timestamp + assert_eq!(fp.last_analyzed, Some(1_706_400_000_000_000)); } #[test] diff --git a/crates/flow/src/monitoring/logging.rs b/crates/flow/src/monitoring/logging.rs index acccf76..f2fa615 100644 --- a/crates/flow/src/monitoring/logging.rs +++ b/crates/flow/src/monitoring/logging.rs @@ -299,18 +299,18 @@ macro_rules! timed_operation { /// Structured logging helpers pub mod structured { - use std::collections::HashMap; + use thread_utils::RapidMap; /// Build a structured log context pub struct LogContext { - fields: HashMap, + fields: RapidMap, } impl LogContext { /// Create a new log context pub fn new() -> Self { Self { - fields: HashMap::new(), + fields: thread_utils::get_map(), } } diff --git a/crates/flow/src/monitoring/mod.rs b/crates/flow/src/monitoring/mod.rs index 199db27..dbd8e85 100644 --- a/crates/flow/src/monitoring/mod.rs +++ b/crates/flow/src/monitoring/mod.rs @@ -62,10 +62,10 @@ pub mod logging; pub mod performance; -use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, RwLock}; use std::time::{Duration, Instant}; +use thread_utils::RapidMap; /// Metrics collector for Thread Flow operations #[derive(Clone)] @@ -89,7 +89,7 @@ struct MetricsInner { start_time: Instant, // Error tracking - errors_by_type: RwLock>, + errors_by_type: RwLock>, } impl Metrics { @@ -105,7 +105,7 @@ impl Metrics { files_processed: AtomicU64::new(0), symbols_extracted: AtomicU64::new(0), start_time: Instant::now(), - errors_by_type: RwLock::new(HashMap::new()), + errors_by_type: RwLock::new(thread_utils::get_map()), }), } } @@ -372,7 +372,7 @@ pub struct MetricsSnapshot { pub throughput_files_per_sec: f64, // Error metrics - pub errors_by_type: HashMap, + pub errors_by_type: RapidMap, pub error_rate: f64, // System metrics diff --git a/crates/flow/src/monitoring/performance.rs b/crates/flow/src/monitoring/performance.rs index 79d27d3..848a4b1 100644 --- a/crates/flow/src/monitoring/performance.rs +++ b/crates/flow/src/monitoring/performance.rs @@ -124,7 +124,7 @@ impl PerformanceMetrics { let total = self.fingerprint_total.load(Ordering::Relaxed); let duration_ns = self.fingerprint_duration_ns.load(Ordering::Relaxed); - let avg_ns = if total > 0 { duration_ns / total } else { 0 }; + let avg_ns = duration_ns.checked_div(total).unwrap_or(0); FingerprintStats { total_count: total, @@ -159,7 +159,7 @@ impl PerformanceMetrics { let duration_ns = self.query_duration_ns.load(Ordering::Relaxed); let errors = self.query_errors.load(Ordering::Relaxed); - let avg_ns = if count > 0 { duration_ns / count } else { 0 }; + let avg_ns = duration_ns.checked_div(count).unwrap_or(0); let error_rate = if count > 0 { (errors as f64 / count as f64) * 100.0 } else { diff --git a/crates/flow/src/targets/d1.rs b/crates/flow/src/targets/d1.rs index 76dd510..21698d9 100644 --- a/crates/flow/src/targets/d1.rs +++ b/crates/flow/src/targets/d1.rs @@ -21,7 +21,6 @@ use recoco::ops::sdk::{ use recoco::setup::{ChangeDescription, CombinedState, ResourceSetupChange, SetupChangeType}; use recoco::utils::prelude::Error as RecocoError; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; use std::fmt::Debug; use std::hash::Hash; use std::sync::Arc; @@ -509,7 +508,9 @@ impl D1SetupState { for field in key_fields { key_columns.push(ColumnSchema { name: field.name.clone(), + // spellchecker:off sql_type: value_type_to_sql(&field.value_type.typ), + // spellchecker:on nullable: field.value_type.nullable, primary_key: true, }); @@ -518,7 +519,9 @@ impl D1SetupState { for field in value_fields { value_columns.push(ColumnSchema { name: field.name.clone(), + // spellchecker:off sql_type: value_type_to_sql(&field.value_type.typ), + // spellchecker:on nullable: field.value_type.nullable, primary_key: false, }); @@ -747,10 +750,10 @@ impl TargetFactoryBase for D1TargetFactory { &self, mutations: Vec>, ) -> Result<(), RecocoError> { - let mut mutations_by_db: HashMap< + let mut mutations_by_db: thread_utils::RapidMap< String, Vec<&ExportTargetMutationWithContext<'_, Self::ExportContext>>, - > = HashMap::new(); + > = thread_utils::get_map(); for mutation in &mutations { mutations_by_db diff --git a/crates/flow/tests/concurrency_tests.rs b/crates/flow/tests/concurrency_tests.rs index de64612..6d9bbad 100644 --- a/crates/flow/tests/concurrency_tests.rs +++ b/crates/flow/tests/concurrency_tests.rs @@ -638,11 +638,19 @@ mod performance_tests { let items: Vec = (0..500).collect(); + // Heavier work to ensure parallelism overhead is negligible relative to computation + let heavy_work = |_n: u32| -> Result<(), ExecutionError> { + let _result: u64 = (0..500_000) + .map(|i| (i as u64).wrapping_mul(i as u64)) + .sum(); + Ok(()) + }; + // Single thread let single = Executor::rayon(Some(1)).unwrap(); let start = Instant::now(); single - .execute_batch(items.clone(), cpu_intensive_work) + .execute_batch(items.clone(), heavy_work) .await .unwrap(); let single_time = start.elapsed(); @@ -650,10 +658,7 @@ mod performance_tests { // Four threads let multi = Executor::rayon(Some(4)).unwrap(); let start = Instant::now(); - multi - .execute_batch(items, cpu_intensive_work) - .await - .unwrap(); + multi.execute_batch(items, heavy_work).await.unwrap(); let multi_time = start.elapsed(); let speedup = single_time.as_secs_f64() / multi_time.as_secs_f64(); diff --git a/crates/flow/tests/extractor_go_tests.rs b/crates/flow/tests/extractor_go_tests.rs index 06661c1..877cad0 100644 --- a/crates/flow/tests/extractor_go_tests.rs +++ b/crates/flow/tests/extractor_go_tests.rs @@ -403,7 +403,7 @@ fn test_resolve_standard_library_import() { let result = extractor.resolve_import_path(Path::new("main.go"), "fmt"); // Standard library imports cannot be resolved to local paths - assert!(result.is_err() || result.unwrap() == PathBuf::from("GOROOT/src/fmt")); + assert!(result.is_err() || result.unwrap() == Path::new("GOROOT/src/fmt")); } #[test] diff --git a/crates/flow/tests/incremental_engine_tests.rs b/crates/flow/tests/incremental_engine_tests.rs index 31b6829..b0f263a 100644 --- a/crates/flow/tests/incremental_engine_tests.rs +++ b/crates/flow/tests/incremental_engine_tests.rs @@ -76,7 +76,7 @@ struct AnalysisResult { files_skipped: usize, /// Number of dependency edges created. - edges_created: usize, + _edges_created: usize, /// Duration of the analysis operation. duration: Duration, @@ -190,7 +190,7 @@ impl IncrementalTestFixture { // STUB: Replace with actual IncrementalAnalyzer implementation // For now, simulate analysis by storing fingerprints let mut files_analyzed = 0; - let mut edges_created = 0; + let edges_created = 0; for (path, content) in &self.files_created { let fp = AnalysisDefFingerprint::new(content.as_bytes()); @@ -207,7 +207,7 @@ impl IncrementalTestFixture { let result = AnalysisResult { files_analyzed, files_skipped: 0, - edges_created, + _edges_created: edges_created, duration: start.elapsed(), invalidated_files: Vec::new(), reanalysis_order: Vec::new(), @@ -267,7 +267,7 @@ impl IncrementalTestFixture { let result = AnalysisResult { files_analyzed, files_skipped, - edges_created: 0, + _edges_created: 0, duration: start.elapsed(), invalidated_files, reanalysis_order: Vec::new(), @@ -288,34 +288,6 @@ impl IncrementalTestFixture { .is_some() } - /// Checks if a dependency edge exists from `from_path` to `to_path`. - async fn verify_edges_exist(&self, from_path: &str, to_path: &str) -> bool { - let from_full = self.temp_dir.path().join(from_path); - let to_full = self.temp_dir.path().join(to_path); - - if let Ok(edges) = self.storage.load_edges_from(&from_full).await { - edges.iter().any(|e| e.to == to_full) - } else { - false - } - } - - /// Gets the list of invalidated files from the last analysis. - fn get_invalidated_files(&self) -> Vec { - self.last_analysis_result - .as_ref() - .map(|r| r.invalidated_files.clone()) - .unwrap_or_default() - } - - /// Gets the reanalysis order from the last analysis. - fn get_reanalysis_order(&self) -> Vec { - self.last_analysis_result - .as_ref() - .map(|r| r.reanalysis_order.clone()) - .unwrap_or_default() - } - /// Returns the path to the test directory. fn test_dir(&self) -> &Path { self.temp_dir.path() @@ -358,25 +330,6 @@ fn create_test_graph(edges: &[(&str, &str)]) -> DependencyGraph { graph } -/// Asserts that the reanalysis order matches the expected order. -fn assert_reanalysis_order(actual: &[PathBuf], expected: &[&str]) { - assert_eq!( - actual.len(), - expected.len(), - "Reanalysis order length mismatch" - ); - - for (i, (actual_path, expected_name)) in actual.iter().zip(expected.iter()).enumerate() { - assert!( - actual_path.ends_with(expected_name), - "Reanalysis order mismatch at position {}: expected {}, got {}", - i, - expected_name, - actual_path.display() - ); - } -} - // ============================================================================= // 1. End-to-End Incremental Workflow Tests (7 tests) // ============================================================================= @@ -892,7 +845,7 @@ async fn test_multiple_simultaneous_changes() { #[tokio::test] async fn test_circular_dependency_handled() { - let mut fixture = IncrementalTestFixture::new().await; + let _fixture = IncrementalTestFixture::new().await; // Create cycle: A β†’ B β†’ A (simulated via edges) // Note: Rust prevents actual circular imports, but graph can have cycles @@ -1034,7 +987,7 @@ async fn test_reanalysis_respects_dependencies() { .await; let result = fixture.run_incremental_update().await.unwrap(); - let order = result.reanalysis_order; + let _order = result.reanalysis_order; // STUB: Will verify B analyzed before C (dependency order) // For now, just verify reanalysis occurred @@ -1070,7 +1023,7 @@ async fn test_independent_files_analyzed_parallel() { let start = Instant::now(); let result = fixture.run_incremental_update().await.unwrap(); - let duration = start.elapsed(); + let _duration = start.elapsed(); // STUB: Will verify parallel execution (duration << sequential) // For now, verify all files reanalyzed @@ -1474,62 +1427,6 @@ async fn test_backend_error_handling() { assert!(result.is_ok()); } -#[tokio::test] -async fn test_transactional_consistency() { - // STUB: Verify batch updates with partial failure maintain consistency - - let mut fixture = IncrementalTestFixture::new().await; - - fixture - .create_file("src/trans1.rs", &create_test_rust_file("trans1", &[])) - .await; - fixture - .create_file("src/trans2.rs", &create_test_rust_file("trans2", &[])) - .await; - fixture.run_initial_analysis().await.unwrap(); - - // STUB: Modify files and inject failure midway - // STUB: Verify rollback or consistent state -} - -#[tokio::test] -async fn test_storage_migration_compatibility() { - // STUB: Verify old schema β†’ new schema data preservation - - let fixture = IncrementalTestFixture::new().await; - - // STUB: Load old schema data - // STUB: Migrate to new schema - // STUB: Verify data integrity preserved - - // For now, just verify current schema works - let graph = DependencyGraph::new(); - fixture.storage.save_full_graph(&graph).await.unwrap(); - let loaded = fixture.storage.load_full_graph().await.unwrap(); - assert_eq!(loaded.node_count(), 0); -} - -// ============================================================================= -// 8. Error Handling Tests (7 tests) -// ============================================================================= - -#[tokio::test] -async fn test_storage_error_during_save() { - // STUB: Trigger storage error during save operation - - let mut fixture = IncrementalTestFixture::new().await; - - fixture - .create_file("src/err.rs", &create_test_rust_file("err", &[])) - .await; - - // STUB: Inject storage error - // STUB: Verify error propagated and state unchanged - - let result = fixture.run_initial_analysis().await; - assert!(result.is_ok()); -} - #[tokio::test] async fn test_graph_cycle_detection() { // Verify cycle detection returns clear error message diff --git a/crates/flow/tests/incremental_integration_tests.rs b/crates/flow/tests/incremental_integration_tests.rs index 1a4d666..2470683 100644 --- a/crates/flow/tests/incremental_integration_tests.rs +++ b/crates/flow/tests/incremental_integration_tests.rs @@ -8,7 +8,6 @@ //! Tests backend factory pattern, feature gating, and end-to-end //! storage operations across all three backend implementations. -use std::collections::HashSet; use std::path::{Path, PathBuf}; use thread_flow::incremental::DependencyGraph; use thread_flow::incremental::backends::{BackendConfig, BackendType, create_backend}; @@ -338,7 +337,7 @@ async fn test_e2e_full_graph_persistence() { ); // 5. Verify affected files computation works after load - let changed = HashSet::from([PathBuf::from("c.rs")]); + let changed: thread_utils::RapidSet = [PathBuf::from("c.rs")].into_iter().collect(); let affected = loaded_graph.find_affected_files(&changed); assert!( @@ -415,7 +414,8 @@ async fn test_e2e_incremental_invalidation() { ); // Find affected files - let changed = HashSet::from([PathBuf::from("config.rs")]); + let changed: thread_utils::RapidSet = + [PathBuf::from("config.rs")].into_iter().collect(); let affected = graph.find_affected_files(&changed); assert!( diff --git a/crates/flow/tests/incremental_postgres_tests.rs b/crates/flow/tests/incremental_postgres_tests.rs index 0341276..42f0e5e 100644 --- a/crates/flow/tests/incremental_postgres_tests.rs +++ b/crates/flow/tests/incremental_postgres_tests.rs @@ -15,7 +15,6 @@ #![cfg(feature = "postgres-backend")] -use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::time::Instant; @@ -127,10 +126,12 @@ async fn test_upsert_fingerprint() { async fn test_fingerprint_with_source_files() { let (backend, _container) = setup_backend().await; - let sources = HashSet::from([ + let sources: thread_utils::RapidSet = [ PathBuf::from("src/utils.rs"), PathBuf::from("src/config.rs"), - ]); + ] + .into_iter() + .collect(); let fp = AnalysisDefFingerprint::with_sources(b"content", sources.clone()); backend @@ -206,7 +207,7 @@ async fn test_delete_nonexistent_fingerprint() { async fn test_delete_fingerprint_cascades_source_files() { let (backend, _container) = setup_backend().await; - let sources = HashSet::from([PathBuf::from("dep.rs")]); + let sources: thread_utils::RapidSet = [PathBuf::from("dep.rs")].into_iter().collect(); let fp = AnalysisDefFingerprint::with_sources(b"content", sources); backend @@ -223,7 +224,9 @@ async fn test_delete_fingerprint_cascades_source_files() { // Re-inserting should work without duplicate key errors let fp2 = AnalysisDefFingerprint::with_sources( b"new content", - HashSet::from([PathBuf::from("other.rs")]), + [PathBuf::from("other.rs")] + .into_iter() + .collect::>(), ); backend .save_fingerprint(Path::new("main.rs"), &fp2) @@ -413,7 +416,10 @@ async fn test_full_graph_with_fingerprints_and_sources() { let (backend, _container) = setup_backend().await; // Save fingerprints with source files - let sources_a = HashSet::from([PathBuf::from("dep1.rs"), PathBuf::from("dep2.rs")]); + let sources_a: thread_utils::RapidSet = + [PathBuf::from("dep1.rs"), PathBuf::from("dep2.rs")] + .into_iter() + .collect(); let mut fp_a = AnalysisDefFingerprint::with_sources(b"content a", sources_a); fp_a.set_last_analyzed(1000); diff --git a/crates/flow/tests/integration_e2e_tests.rs b/crates/flow/tests/integration_e2e_tests.rs index 3ac77e1..07a799e 100644 --- a/crates/flow/tests/integration_e2e_tests.rs +++ b/crates/flow/tests/integration_e2e_tests.rs @@ -17,7 +17,6 @@ //! 5. **Storage Backend Validation** (6 tests): InMemory persistence, state transitions //! 6. **Error Handling & Edge Cases** (6 tests): Parse failures, large files, concurrent mods -use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; use thread_flow::incremental::analyzer::IncrementalAnalyzer; @@ -159,31 +158,6 @@ impl IntegrationFixture { graph.edge_count() ); } - - /// Validates that the given files exist in the dependency graph. - async fn assert_files_in_graph(&self, files: &[&str]) { - let graph = self.builder.graph(); - for file in files { - let path = self.temp_path().join(file); - assert!( - graph.contains_node(&path), - "File {} should exist in graph", - file - ); - } - } - - /// Validates that a dependency edge exists between two files. - async fn assert_edge_exists(&self, from: &str, to: &str) { - let graph = self.builder.graph(); - let from_path = self.temp_path().join(from); - let to_path = self.temp_path().join(to); - - let deps = graph.get_dependencies(&from_path); - let has_edge = deps.iter().any(|edge| edge.to == to_path); - - assert!(has_edge, "Expected edge from {} to {} not found", from, to); - } } // ═══════════════════════════════════════════════════════════════════════════ @@ -681,7 +655,7 @@ async fn test_e2e_circular_detection() { fixture.analyzer.graph_mut().add_edge(edge_b_to_a); // Topological sort should fail on cycle - let files = HashSet::from([a.clone(), b.clone()]); + let files: thread_utils::RapidSet = [a.clone(), b.clone()].into_iter().collect(); let result = fixture.analyzer.graph().topological_sort(&files); assert!(result.is_err(), "Should detect circular dependency"); } diff --git a/crates/flow/tests/invalidation_tests.rs b/crates/flow/tests/invalidation_tests.rs index 3c50cfc..bcec61a 100644 --- a/crates/flow/tests/invalidation_tests.rs +++ b/crates/flow/tests/invalidation_tests.rs @@ -19,15 +19,15 @@ use thread_flow::incremental::types::{DependencyEdge, DependencyType}; #[test] fn test_invalidation_detector_new() { - let graph = DependencyGraph::new(); + let _graph = DependencyGraph::new(); // let detector = InvalidationDetector::new(graph); // assert!(detector is valid); } #[test] fn test_invalidation_detector_with_populated_graph() { - let mut graph = DependencyGraph::new(); - graph.add_edge(DependencyEdge::new( + let mut _graph = DependencyGraph::new(); + _graph.add_edge(DependencyEdge::new( PathBuf::from("A"), PathBuf::from("B"), DependencyType::Import, @@ -128,7 +128,7 @@ fn test_propagate_respects_strong_dependencies_only() { #[test] fn test_propagate_unknown_file() { - let graph = DependencyGraph::new(); + let _graph = DependencyGraph::new(); // let detector = InvalidationDetector::new(graph); // let affected = detector.propagate_invalidation(&PathBuf::from("unknown.rs")); // Should return just the unknown file itself @@ -237,7 +237,7 @@ fn test_topological_sort_cycle_error() { #[test] fn test_topological_sort_empty_set() { - let graph = DependencyGraph::new(); + let _graph = DependencyGraph::new(); // let detector = InvalidationDetector::new(graph); // let sorted = detector.topological_sort(&[]).unwrap(); // assert!(sorted.is_empty()); @@ -354,7 +354,7 @@ fn test_compute_invalidation_multiple_cycles() { #[test] fn test_compute_invalidation_empty_changes() { - let graph = DependencyGraph::new(); + let _graph = DependencyGraph::new(); // let detector = InvalidationDetector::new(graph); // let result = detector.compute_invalidation_set(&[]); diff --git a/crates/flow/tests/observability_metrics_tests.rs b/crates/flow/tests/observability_metrics_tests.rs index 77a6b84..26743ec 100644 --- a/crates/flow/tests/observability_metrics_tests.rs +++ b/crates/flow/tests/observability_metrics_tests.rs @@ -32,7 +32,10 @@ async fn test_metrics_during_analysis() { let mut analyzer = IncrementalAnalyzer::new(storage); // Perform analysis (metrics should be recorded) - let result = analyzer.analyze_changes(&[file1.clone()]).await.unwrap(); + let result = analyzer + .analyze_changes(std::slice::from_ref(&file1)) + .await + .unwrap(); // Verify basic functionality (metrics are recorded internally) assert_eq!(result.changed_files.len(), 1); @@ -48,11 +51,17 @@ async fn test_cache_hit_metrics() { let mut analyzer = IncrementalAnalyzer::new(storage); // First analysis - cache miss - let result1 = analyzer.analyze_changes(&[file1.clone()]).await.unwrap(); + let result1 = analyzer + .analyze_changes(std::slice::from_ref(&file1)) + .await + .unwrap(); assert_eq!(result1.cache_hit_rate, 0.0); // Second analysis - cache hit - let result2 = analyzer.analyze_changes(&[file1.clone()]).await.unwrap(); + let result2 = analyzer + .analyze_changes(std::slice::from_ref(&file1)) + .await + .unwrap(); assert_eq!(result2.cache_hit_rate, 1.0); } @@ -106,7 +115,7 @@ async fn test_invalidation_metrics() { // Trigger invalidation (invalidation metrics should be recorded) let affected = analyzer - .invalidate_dependents(&[file2.clone()]) + .invalidate_dependents(std::slice::from_ref(&file2)) .await .unwrap(); diff --git a/crates/flow/tests/test_data/large.rs b/crates/flow/tests/test_data/large.rs index 679fb70..ae4b2a1 100644 --- a/crates/flow/tests/test_data/large.rs +++ b/crates/flow/tests/test_data/large.rs @@ -35,16 +35,36 @@ impl LargeStruct { } } -pub fn function1() -> i32 { 1 } -pub fn function2() -> i32 { 2 } -pub fn function3() -> i32 { 3 } -pub fn function4() -> i32 { 4 } -pub fn function5() -> i32 { 5 } -pub fn function6() -> i32 { 6 } -pub fn function7() -> i32 { 7 } -pub fn function8() -> i32 { 8 } -pub fn function9() -> i32 { 9 } -pub fn function10() -> i32 { 10 } +pub fn function1() -> i32 { + 1 +} +pub fn function2() -> i32 { + 2 +} +pub fn function3() -> i32 { + 3 +} +pub fn function4() -> i32 { + 4 +} +pub fn function5() -> i32 { + 5 +} +pub fn function6() -> i32 { + 6 +} +pub fn function7() -> i32 { + 7 +} +pub fn function8() -> i32 { + 8 +} +pub fn function9() -> i32 { + 9 +} +pub fn function10() -> i32 { + 10 +} pub fn caller() { function1(); diff --git a/crates/flow/tests/test_data/sample.rs b/crates/flow/tests/test_data/sample.rs index 9d3ae4c..c4769c4 100644 --- a/crates/flow/tests/test_data/sample.rs +++ b/crates/flow/tests/test_data/sample.rs @@ -5,9 +5,9 @@ //! Sample Rust code for testing ThreadParse functionality +use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::path::PathBuf; -use serde::{Deserialize, Serialize}; /// A sample struct representing a user #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/flow/tests/test_data/syntax_error.rs b/crates/flow/tests/test_data/syntax_error.rs index 5e6da88..b14318d 100644 --- a/crates/flow/tests/test_data/syntax_error.rs +++ b/crates/flow/tests/test_data/syntax_error.rs @@ -7,7 +7,6 @@ fn broken_function( { let x = 42 return x } - struct BrokenStruct missing_field: String } diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index c15f174..1b3d2d9 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -37,27 +37,30 @@ tree-sitter-bash = { version = "0.25.0", optional = true } tree-sitter-c = { version = "0.24.1", optional = true } tree-sitter-c-sharp = { version = "0.23.1", optional = true } tree-sitter-cpp = { version = "0.23.4", optional = true } -tree-sitter-css = { version = "0.23.2", optional = true } +tree-sitter-css = { version = "0.25.0", optional = true } tree-sitter-elixir = { version = "0.3.4", optional = true } -tree-sitter-go = { version = "0.23.4", optional = true } +tree-sitter-go = { version = "0.25.0", optional = true } tree-sitter-haskell = { version = "0.23.1", optional = true } +tree-sitter-hcl = { version = "1.1.0", optional = true } tree-sitter-html = { version = "0.23.2", optional = true } tree-sitter-java = { version = "0.23.5", optional = true } -tree-sitter-javascript = { version = "0.23.1", optional = true } +tree-sitter-javascript = { version = "0.25.0", optional = true } tree-sitter-json = { version = "0.24.8", optional = true } tree-sitter-kotlin = { package = "tree-sitter-kotlin-sg", version = "0.4.0", optional = true } -tree-sitter-lua = { version = "0.2.0", optional = true } -tree-sitter-php = { version = "0.23.11", optional = true } -tree-sitter-python = { version = "0.23.6", optional = true } +tree-sitter-lua = { version = "0.4.1", optional = true } +tree-sitter-nix = { version = "0.3.0", optional = true } +tree-sitter-php = { version = "0.24.2", optional = true } +tree-sitter-python = { version = "0.25.0", optional = true } tree-sitter-ruby = { version = "0.23.1", optional = true } tree-sitter-rust = { version = "0.24.0", optional = true } tree-sitter-scala = { version = "0.24.0", optional = true } +tree-sitter-solidity = { version = "1.2.13", optional = true } tree-sitter-swift = { version = "0.7.1", optional = true } tree-sitter-typescript = { version = "0.23.2", optional = true } tree-sitter-yaml = { version = "0.7.1", optional = true } [dev-dependencies] -criterion = { version = "0.6", features = ["html_reports"] } +criterion = { version = "0.8.2", features = ["html_reports"] } thread-ast-engine = { workspace = true, features = ["matching", "parsing"] } [build-dependencies] @@ -76,6 +79,7 @@ all-parsers = [ "elixir", "go", "haskell", + "hcl", "html", "html-embedded", "java", @@ -83,11 +87,13 @@ all-parsers = [ "json", "kotlin", "lua", + "nix", "php", "python", "ruby", "rust", "scala", + "solidity", "swift", "tree-sitter-parsing", "tsx", @@ -104,6 +110,7 @@ css-napi = ["napi-environment", "tree-sitter-css"] elixir = ["tree-sitter-elixir", "tree-sitter-parsing"] go = ["tree-sitter-go", "tree-sitter-parsing"] haskell = ["tree-sitter-haskell", "tree-sitter-parsing"] +hcl = ["tree-sitter-hcl", "tree-sitter-parsing"] html = ["tree-sitter-html", "tree-sitter-parsing"] html-embedded = [ "css", @@ -138,6 +145,7 @@ napi-compatible = [ #! Tree-sitter cannot build for NAPI-WASM #! (that's Node API -- wasm for nodejs environments) napi-environment = [] +nix = ["tree-sitter-nix", "tree-sitter-parsing"] no-enabled-langs = [] php = ["tree-sitter-parsing", "tree-sitter-php"] profiling = [] @@ -145,6 +153,7 @@ python = ["tree-sitter-parsing", "tree-sitter-python"] ruby = ["tree-sitter-parsing", "tree-sitter-ruby"] rust = ["tree-sitter-parsing", "tree-sitter-rust"] scala = ["tree-sitter-parsing", "tree-sitter-scala"] +solidity = ["tree-sitter-parsing", "tree-sitter-solidity"] swift = ["tree-sitter-parsing", "tree-sitter-swift"] tree-sitter-parsing = ["thread-ast-engine/parsing"] tsx = ["tree-sitter-parsing", "tree-sitter-typescript"] diff --git a/crates/language/benches/performance.rs b/crates/language/benches/performance.rs index b5373b4..60e7da1 100644 --- a/crates/language/benches/performance.rs +++ b/crates/language/benches/performance.rs @@ -8,6 +8,8 @@ use criterion::{Criterion, criterion_group, criterion_main}; use std::hint::black_box; use std::path::Path; use std::str::FromStr; +use thread_ast_engine::language::Language; +use thread_ast_engine::tree_sitter::LanguageExt; use thread_language::*; fn bench_pre_process_pattern(c: &mut Criterion) { diff --git a/crates/language/src/bash.rs b/crates/language/src/bash.rs index dbef12f..3cc19b4 100644 --- a/crates/language/src/bash.rs +++ b/crates/language/src/bash.rs @@ -7,7 +7,6 @@ // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT use super::*; - fn test_match(query: &str, source: &str) { use crate::test::test_match_lang; test_match_lang(query, source, Bash); diff --git a/crates/language/src/constants.rs b/crates/language/src/constants.rs index 51e1fc1..ad4a10a 100644 --- a/crates/language/src/constants.rs +++ b/crates/language/src/constants.rs @@ -5,7 +5,7 @@ use crate::SupportLang; -pub const ALL_SUPPORTED_LANGS: [&str; 23] = [ +pub const ALL_SUPPORTED_LANGS: [&str; 26] = [ "bash", "c", "cpp", @@ -14,17 +14,20 @@ pub const ALL_SUPPORTED_LANGS: [&str; 23] = [ "elixir", "go", "haskell", + "hcl", "html", "java", "javascript", "json", "kotlin", "lua", + "nix", "php", "python", "rust", "ruby", "scala", + "solidity", "swift", "typescript", "tsx", @@ -32,13 +35,12 @@ pub const ALL_SUPPORTED_LANGS: [&str; 23] = [ ]; #[cfg(any(feature = "bash", feature = "all-parsers"))] -pub const BASH_EXTS: [&str; 19] = [ +pub const BASH_EXTS: [&str; 18] = [ "bash", "bats", "sh", ".bashrc", "bash_aliases", - "bats", "cgi", "command", "env", @@ -66,8 +68,8 @@ cfg_if::cfg_if! { /// C++ specific extensions; we consider cuda c++ for our purposes #[cfg(any(feature = "cpp", feature = "all-parsers"))] -pub const CPP_EXTS: [&str; 11] = [ - "cpp", "cc", "cxx", "hxx", "c++", "hh", "cxx", "cu", "ino", "h", "cu", +pub const CPP_EXTS: [&str; 10] = [ + "cpp", "cc", "cxx", "hpp", "hxx", "c++", "hh", "cu", "ino", "h", ]; #[cfg(any(feature = "csharp", feature = "all-parsers"))] @@ -79,7 +81,7 @@ pub const CSHARP_EXTS: [&str; 2] = ["cs", "csx"]; feature = "css-napi", feature = "napi-compatible" ))] -pub const CSS_EXTS: [&str; 1] = ["css"]; +pub const CSS_EXTS: [&str; 2] = ["css", "scss"]; #[cfg(any(feature = "elixir", feature = "all-parsers"))] pub const ELIXIR_EXTS: [&str; 2] = ["ex", "exs"]; @@ -87,9 +89,12 @@ pub const ELIXIR_EXTS: [&str; 2] = ["ex", "exs"]; #[cfg(any(feature = "go", feature = "all-parsers"))] pub const GO_EXTS: [&str; 1] = ["go"]; -#[cfg(feature = "haskell")] +#[cfg(any(feature = "haskell", feature = "all-parsers"))] pub const HASKELL_EXTS: [&str; 2] = ["hs", "lhs"]; +#[cfg(any(feature = "hcl", feature = "all-parsers"))] +pub const HCL_EXTS: [&str; 6] = ["hcl", "nomad", "tf", "tfvars", "tfstate", "workflow"]; + #[cfg(any( feature = "html", feature = "all-parsers", @@ -118,6 +123,9 @@ pub const KOTLIN_EXTS: [&str; 3] = ["kt", "kts", "ktm"]; #[cfg(any(feature = "lua", feature = "all-parsers"))] pub const LUA_EXTS: [&str; 1] = ["lua"]; +#[cfg(any(feature = "nix", feature = "all-parsers"))] +pub const NIX_EXTS: [&str; 1] = ["nix"]; + #[cfg(any(feature = "php", feature = "all-parsers"))] pub const PHP_EXTS: [&str; 2] = ["php", "phtml"]; @@ -133,6 +141,9 @@ pub const RUST_EXTS: [&str; 1] = ["rs"]; #[cfg(any(feature = "scala", feature = "all-parsers"))] pub const SCALA_EXTS: [&str; 4] = ["scala", "sc", "scm", "sbt"]; +#[cfg(any(feature = "solidity", feature = "all-parsers"))] +pub const SOLIDITY_EXTS: [&str; 1] = ["sol"]; + #[cfg(any(feature = "swift", feature = "all-parsers"))] pub const SWIFT_EXTS: [&str; 2] = ["swift", "xctest"]; @@ -161,16 +172,16 @@ cfg_if::cfg_if!( any( feature = "bash", feature = "c", feature = "cpp", feature = "csharp", feature = "css", feature = "elixir", - feature = "go", feature = "haskell", feature = "html", + feature = "go", feature = "haskell", feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", - feature = "kotlin", feature = "lua", feature = "php", + feature = "kotlin", feature = "lua", feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", - feature = "scala", feature = "swift", feature = "tsx", + feature = "scala", feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", feature = "yaml" ) ) )] { - pub const ENABLED_LANGS: &'static [&'static crate::SupportLang; 1] = &[&crate::SupportLang::NoEnabledLangs]; + pub const ENABLED_LANGS: &[&crate::SupportLang; 1] = &[&crate::SupportLang::NoEnabledLangs]; } else { pub const ENABLED_LANGS: &[&SupportLang] = &{ // Count total enabled languages @@ -193,7 +204,9 @@ cfg_if::cfg_if!( { count += 1; } #[cfg(any(feature = "go", feature = "all-parsers"))] { count += 1; } - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] + { count += 1; } + #[cfg(any(feature = "hcl", feature = "all-parsers"))] { count += 1; } #[cfg(any(feature = "html", feature = "all-parsers", feature = "html-napi", feature = "napi-compatible"))] { count += 1; } @@ -207,6 +220,8 @@ cfg_if::cfg_if!( { count += 1; } #[cfg(any(feature = "lua", feature = "all-parsers"))] { count += 1; } + #[cfg(any(feature = "nix", feature = "all-parsers"))] + { count += 1; } #[cfg(any(feature = "php", feature = "all-parsers"))] { count += 1; } #[cfg(any(feature = "python", feature = "all-parsers"))] @@ -217,12 +232,16 @@ cfg_if::cfg_if!( { count += 1; } #[cfg(any(feature = "scala", feature = "all-parsers"))] { count += 1; } + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + { count += 1; } #[cfg(any(feature = "swift", feature = "all-parsers"))] { count += 1; } #[cfg(any(feature = "typescript", feature = "all-parsers", feature = "typescript-napi", feature = "napi-compatible"))] { count += 1; } #[cfg(any(feature = "tsx", feature = "all-parsers", feature = "tsx-napi", feature = "napi-compatible"))] - { count += 1; } + { + count += 1; + } #[cfg(any(feature = "yaml", feature = "all-parsers"))] { count += 1; } @@ -265,10 +284,14 @@ cfg_if::cfg_if!( result[index] = &Go; index += 1; } - #[cfg(feature = "haskell")] { + #[cfg(any(feature = "haskell", feature = "all-parsers"))] { result[index] = &Haskell; index += 1; } + #[cfg(any(feature = "hcl", feature = "all-parsers"))] { + result[index] = &Hcl; + index += 1; + } #[cfg(any(feature = "html", feature = "all-parsers", feature = "html-napi", feature = "napi-compatible"))] { result[index] = &Html; index += 1; @@ -293,6 +316,10 @@ cfg_if::cfg_if!( result[index] = &Lua; index += 1; } + #[cfg(any(feature = "nix", feature = "all-parsers"))] { + result[index] = &Nix; + index += 1; + } #[cfg(any(feature = "php", feature = "all-parsers"))] { result[index] = &Php; index += 1; @@ -313,6 +340,10 @@ cfg_if::cfg_if!( result[index] = &Scala; index += 1; } + #[cfg(any(feature = "solidity", feature = "all-parsers"))] { + result[index] = &Solidity; + index += 1; + } #[cfg(any(feature = "swift", feature = "all-parsers"))] { result[index] = &Swift; index += 1; @@ -321,7 +352,8 @@ cfg_if::cfg_if!( result[index] = &TypeScript; index += 1; } - #[cfg(any(feature = "tsx", feature = "all-parsers", feature = "tsx-napi", feature = "napi-compatible"))] { + #[cfg(any(feature = "tsx", feature = "all-parsers", feature = "tsx-napi", feature = "napi-compatible"))] + { result[index] = &Tsx; index += 1; } @@ -342,16 +374,16 @@ cfg_if::cfg_if!( any(feature = "all-parsers", feature = "napi-compatible", feature = "css-napi", feature = "html-napi", feature = "javascript-napi", feature = "typescript-napi", feature = "tsx-napi", feature = "bash", feature = "c", feature = "cpp", feature = "csharp", feature = "css", feature = "elixir", - feature = "go", feature = "haskell", feature = "html", + feature = "go", feature = "haskell", feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", - feature = "kotlin", feature = "lua", feature = "php", + feature = "kotlin", feature = "lua", feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", - feature = "scala", feature = "swift", feature = "tsx", + feature = "scala", feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", feature = "yaml" ) ) )] { - pub const EXTENSIONS: &'static [&'static str; 0] = &[]; + pub const EXTENSIONS: &[&'static &str; 0] = &[]; } else { pub const EXTENSIONS: &[&str] = &{ // Count total extensions needed @@ -381,9 +413,12 @@ cfg_if::cfg_if!( #[cfg(any(feature = "go", feature = "all-parsers"))] { count += GO_EXTS.len(); } - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] { count += HASKELL_EXTS.len(); } + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + { count += HCL_EXTS.len(); } + #[cfg(any(feature = "html", feature = "all-parsers", feature = "html-napi", feature = "napi-compatible"))] { count += HTML_EXTS.len(); } @@ -402,6 +437,9 @@ cfg_if::cfg_if!( #[cfg(any(feature = "lua", feature = "all-parsers"))] { count += LUA_EXTS.len(); } + #[cfg(any(feature = "nix", feature = "all-parsers"))] + { count += NIX_EXTS.len(); } + #[cfg(any(feature = "php", feature = "all-parsers"))] { count += PHP_EXTS.len(); } @@ -417,6 +455,9 @@ cfg_if::cfg_if!( #[cfg(any(feature = "scala", feature = "all-parsers"))] { count += SCALA_EXTS.len(); } + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + { count += SOLIDITY_EXTS.len(); } + #[cfg(any(feature = "swift", feature = "all-parsers"))] { count += SWIFT_EXTS.len(); } @@ -516,7 +557,7 @@ cfg_if::cfg_if!( } } - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] { let mut i = 0; while i < HASKELL_EXTS.len() { @@ -526,6 +567,16 @@ cfg_if::cfg_if!( } } + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + { + let mut i = 0; + while i < HCL_EXTS.len() { + result[index] = HCL_EXTS[i]; + index += 1; + i += 1; + } + } + #[cfg(any(feature = "html", feature = "all-parsers", feature = "html-napi", feature = "napi-compatible"))] { let mut i = 0; @@ -586,6 +637,16 @@ cfg_if::cfg_if!( } } + #[cfg(any(feature = "nix", feature = "all-parsers"))] + { + let mut i = 0; + while i < NIX_EXTS.len() { + result[index] = NIX_EXTS[i]; + index += 1; + i += 1; + } + } + #[cfg(any(feature = "php", feature = "all-parsers"))] { let mut i = 0; @@ -636,6 +697,16 @@ cfg_if::cfg_if!( } } + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + { + let mut i = 0; + while i < SOLIDITY_EXTS.len() { + result[index] = SOLIDITY_EXTS[i]; + index += 1; + i += 1; + } + } + #[cfg(any(feature = "swift", feature = "all-parsers"))] { let mut i = 0; @@ -697,11 +768,11 @@ cfg_if::cfg_if!( any( feature = "bash", feature = "c", feature = "cpp", feature = "csharp", feature = "css", feature = "elixir", - feature = "go", feature = "haskell", feature = "html", + feature = "go", feature = "haskell", feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", - feature = "kotlin", feature = "lua", feature = "php", + feature = "kotlin", feature = "lua", feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", - feature = "scala", feature = "swift", feature = "tsx", + feature = "scala", feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", feature = "yaml", feature = "napi-compatible", feature = "css-napi", feature = "html-napi", feature = "javascript-napi", feature = "tsx-napi", feature = "typescript-napi" ) ) @@ -736,9 +807,12 @@ cfg_if::cfg_if!( #[cfg(any(feature = "go", feature = "all-parsers"))] { count += GO_EXTS.len(); } - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] { count += HASKELL_EXTS.len(); } + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + { count += HCL_EXTS.len(); } + #[cfg(any(feature = "html", feature = "all-parsers", feature = "html-napi", feature = "napi-compatible"))] { count += HTML_EXTS.len(); } @@ -757,6 +831,9 @@ cfg_if::cfg_if!( #[cfg(any(feature = "lua", feature = "all-parsers"))] { count += LUA_EXTS.len(); } + #[cfg(any(feature = "nix", feature = "all-parsers"))] + { count += NIX_EXTS.len(); } + #[cfg(any(feature = "php", feature = "all-parsers"))] { count += PHP_EXTS.len(); } @@ -772,6 +849,9 @@ cfg_if::cfg_if!( #[cfg(any(feature = "scala", feature = "all-parsers"))] { count += SCALA_EXTS.len(); } + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + { count += SOLIDITY_EXTS.len(); } + #[cfg(any(feature = "swift", feature = "all-parsers"))] { count += SWIFT_EXTS.len(); } @@ -862,7 +942,7 @@ cfg_if::cfg_if!( } } - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] { let mut i = 0; while i < HASKELL_EXTS.len() { @@ -872,6 +952,16 @@ cfg_if::cfg_if!( } } + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + { + let mut i = 0; + while i < HCL_EXTS.len() { + result[index] = SupportLang::Hcl; + index += 1; + i += 1; + } + } + #[cfg(any(feature = "html", feature = "all-parsers", feature = "html-napi", feature = "napi-compatible"))] { let mut i = 0; @@ -932,6 +1022,16 @@ cfg_if::cfg_if!( } } + #[cfg(any(feature = "nix", feature = "all-parsers"))] + { + let mut i = 0; + while i < NIX_EXTS.len() { + result[index] = SupportLang::Nix; + index += 1; + i += 1; + } + } + #[cfg(any(feature = "php", feature = "all-parsers"))] { let mut i = 0; @@ -982,6 +1082,16 @@ cfg_if::cfg_if!( } } + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + { + let mut i = 0; + while i < SOLIDITY_EXTS.len() { + result[index] = SupportLang::Solidity; + index += 1; + i += 1; + } + } + #[cfg(any(feature = "swift", feature = "all-parsers"))] { let mut i = 0; @@ -1031,7 +1141,7 @@ cfg_if::cfg_if!( } ); -// ========== Consts for Planned Features ========== +// ========== Constants for Planned Features ========== // these aren't yet implemented /// List of files that DO NOT have an extension but are still associated with a language. diff --git a/crates/language/src/cpp.rs b/crates/language/src/cpp.rs index 169bcc4..69b11a5 100644 --- a/crates/language/src/cpp.rs +++ b/crates/language/src/cpp.rs @@ -5,7 +5,6 @@ // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT - use super::*; fn test_match(query: &str, source: &str) { diff --git a/crates/language/src/elixir.rs b/crates/language/src/elixir.rs index 9a4d705..9aba301 100644 --- a/crates/language/src/elixir.rs +++ b/crates/language/src/elixir.rs @@ -1,10 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT +#![cfg(test)] use super::*; diff --git a/crates/language/src/ext_iden.rs b/crates/language/src/ext_iden.rs index dd3d377..30d556b 100644 --- a/crates/language/src/ext_iden.rs +++ b/crates/language/src/ext_iden.rs @@ -75,6 +75,7 @@ mod tests { // Test complex extensions assert_eq!(match_by_aho_corasick("tsx"), Some(SupportLang::Tsx)); assert_eq!(match_by_aho_corasick("cpp"), Some(SupportLang::Cpp)); + assert_eq!(match_by_aho_corasick("workflow"), Some(SupportLang::Hcl)); // Test ambiguous extensions (C vs C++) // "c" extension should match C (first in enum order) diff --git a/crates/language/src/go.rs b/crates/language/src/go.rs index 9f50a46..4d4f88f 100644 --- a/crates/language/src/go.rs +++ b/crates/language/src/go.rs @@ -1,11 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT - +#![cfg(test)] use super::*; fn test_match(query: &str, source: &str) { @@ -52,15 +50,15 @@ func intSeq() { i++ }() }"#, - r#"defer func() { -$$$BODY }()"#, - r#"func b() { $$$BODY }"#, + r#"defer func() { $$$BODY }()"#, + r#"func b() { $$$BODY}"#, ); assert_eq!( ret, r#" func intSeq() { - func b() { i++ } + func b() { i++ + } }"# ); } diff --git a/crates/language/src/haskell.rs b/crates/language/src/haskell.rs index b28e041..40f960c 100644 --- a/crates/language/src/haskell.rs +++ b/crates/language/src/haskell.rs @@ -1,10 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT +#![cfg(test)] use super::*; diff --git a/crates/language/src/hcl.rs b/crates/language/src/hcl.rs new file mode 100644 index 0000000..f133dcc --- /dev/null +++ b/crates/language/src/hcl.rs @@ -0,0 +1,66 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT +#![cfg(test)] +use super::*; +use crate::test::{test_match_lang, test_replace_lang}; + +fn test_match(s1: &str, s2: &str) { + test_match_lang(s1, s2, Hcl) +} + +#[test] +fn test_hcl_pattern() { + test_match("$A = $B", r#"foo = "bar""#); + test_match( + "resource $TYPE $NAME $BODY", + r#"resource "aws_instance" "example" { ami = "ami-123" }"#, + ); + test_match( + "$BLOCK $BODY", + r#"terraform { required_providers { aws = { source = "hashicorp/aws" } } }"#, + ); + test_match( + "variable $NAME $CONFIG", + r#"variable "region" { default = "us-west-2" }"#, + ); + test_match( + "output $NAME $VALUE", + r#"output "instance_ip" { value = aws_instance.example.public_ip }"#, + ); + test_match("$VAR = [$$$ITEMS]", r#"tags = ["production", "web"]"#); + test_match( + "$VAR = { $$$PAIRS }", + r#"labels = { environment = "prod", team = "backend" }"#, + ); + test_match(r#"$VAR = "$CONTENT""#, r#"name = "instance""#); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + test_replace_lang(src, pattern, replacer, Hcl) +} + +#[test] +fn test_hcl_replace() { + let ret = test_replace(r#"foo = "bar""#, r#"$A = $B"#, r#"$B = $A"#); + assert_eq!(ret, r#""bar" = foo"#); + + let ret = test_replace( + r#"resource "aws_instance" "example" { ami = "ami-123" }"#, + r#"resource $TYPE $NAME $BODY"#, + r#"resource $NAME $TYPE $BODY"#, + ); + assert_eq!( + ret, + r#"resource "example" "aws_instance" { ami = "ami-123" }"# + ); + + let ret = test_replace( + r#"variable "region" { default = "us-west-2" }"#, + r#"variable "region" { default = $DEFAULT }"#, + r#"variable "region" { default = "eu-west-1" }"#, + ); + assert_eq!(ret, r#"variable "region" { default = "eu-west-1" }"#); +} diff --git a/crates/language/src/kotlin.rs b/crates/language/src/kotlin.rs index e49d357..f42fab0 100644 --- a/crates/language/src/kotlin.rs +++ b/crates/language/src/kotlin.rs @@ -1,11 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT - +#![cfg(test)] use super::*; fn test_match(query: &str, source: &str) { diff --git a/crates/language/src/lib.rs b/crates/language/src/lib.rs index a8bf0a2..08dbb31 100644 --- a/crates/language/src/lib.rs +++ b/crates/language/src/lib.rs @@ -19,7 +19,7 @@ //! ### Custom Pattern Languages //! Languages requiring special metavariable handling with custom expando characters: //! - [`C`] (`Β΅`), [`Cpp`] (`Β΅`), [`CSharp`] (`Β΅`), [`Css`] (`_`), [`Elixir`] (`Β΅`) -//! - [`Go`] (`Β΅`), [`Haskell`] (`Β΅`), [`Html`] (`z`), [`Kotlin`] (`Β΅`), [`Php`] (`Β΅`) +//! - [`Go`] (`Β΅`), [`Haskell`] (`Β΅`), [`Hcl`] (`Β΅`), [`Html`] (`z`), [`Kotlin`] (`Β΅`), [`Nix`](`_`), [`Php`] (`Β΅`) //! - [`Python`] (`Β΅`), [`Ruby`] (`Β΅`), [`Rust`] (`Β΅`), [`Swift`] (`Β΅`) //! //! ## Usage @@ -57,17 +57,20 @@ pub mod ext_iden; feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -92,8 +95,10 @@ mod css; mod elixir; #[cfg(any(feature = "go", feature = "all-parsers"))] mod go; -#[cfg(feature = "haskell")] +#[cfg(any(feature = "haskell", feature = "all-parsers"))] mod haskell; +#[cfg(any(feature = "hcl", feature = "all-parsers"))] +mod hcl; #[cfg(any( feature = "html", feature = "all-parsers", @@ -101,12 +106,21 @@ mod haskell; feature = "napi-compatible" ))] mod html; +#[cfg(any(feature = "java", feature = "all-parsers"))] +#[cfg(any( + feature = "javascript", + feature = "all-parsers", + feature = "javascript-napi", + feature = "napi-compatible" +))] #[cfg(any(feature = "json", feature = "all-parsers"))] mod json; #[cfg(any(feature = "kotlin", feature = "all-parsers"))] mod kotlin; #[cfg(any(feature = "lua", feature = "all-parsers"))] mod lua; +#[cfg(any(feature = "nix", feature = "all-parsers"))] +mod nix; #[cfg(any(feature = "php", feature = "all-parsers"))] mod php; #[cfg(any(feature = "python", feature = "all-parsers"))] @@ -117,6 +131,8 @@ mod ruby; mod rust; #[cfg(any(feature = "scala", feature = "all-parsers"))] mod scala; +#[cfg(any(feature = "solidity", feature = "all-parsers"))] +mod solidity; #[cfg(any(feature = "swift", feature = "all-parsers"))] mod swift; #[cfg(any(feature = "yaml", feature = "all-parsers"))] @@ -134,56 +150,29 @@ use thread_ast_engine::{Pattern, PatternBuilder, PatternError}; #[cfg(feature = "profiling")] pub mod profiling; +#[allow(unused_imports)] use ignore::types::{Types, TypesBuilder}; +#[allow(unused_imports)] use serde::de::Visitor; +#[allow(unused_imports)] use serde::{Deserialize, Deserializer, Serialize, de}; +#[allow(unused_imports)] use std::borrow::Cow; -use std::fmt; -use std::fmt::{Display, Formatter}; +use std::fmt::{self, Display, Formatter}; use std::path::Path; use std::str::FromStr; -#[cfg(feature = "matching")] + +#[allow(unused_imports)] use thread_ast_engine::Node; -use thread_ast_engine::meta_var::MetaVariable; -#[cfg(feature = "matching")] -use thread_ast_engine::tree_sitter::{StrDoc, TSRange}; -#[cfg(any( - feature = "all-parsers", - feature = "napi-compatible", - feature = "css-napi", - feature = "html-napi", - feature = "javascript-napi", - feature = "typescript-napi", - feature = "tsx-napi", - feature = "bash", - feature = "c", - feature = "cpp", - feature = "csharp", - feature = "css", - feature = "elixir", - feature = "go", - feature = "haskell", - feature = "html", - feature = "java", - feature = "javascript", - feature = "json", - feature = "kotlin", - feature = "lua", - feature = "php", - feature = "python", - feature = "ruby", - feature = "rust", - feature = "scala", - feature = "swift", - feature = "tsx", - feature = "typescript", - feature = "yaml" -))] -pub use thread_ast_engine::{ - language::Language, - tree_sitter::{LanguageExt, TSLanguage}, -}; +#[allow(unused_imports)] +use thread_ast_engine::language::Language; +#[allow(unused_imports)] #[cfg(feature = "matching")] +use thread_ast_engine::meta_var::MetaVariable; +#[allow(unused_imports)] +#[cfg(feature = "tree-sitter-parsing")] +use thread_ast_engine::tree_sitter::{LanguageExt, StrDoc, TSLanguage, TSRange}; +#[allow(unused_imports)] use thread_utils::RapidMap; /// Implements standard [`Language`] and [`LanguageExt`] traits for languages that accept `$` in identifiers. @@ -209,6 +198,7 @@ use thread_utils::RapidMap; feature = "javascript-napi", feature = "json", feature = "lua", + feature = "solidity", feature = "scala", feature = "tsx", feature = "tsx-napi", @@ -268,6 +258,7 @@ macro_rules! impl_lang { /// let result = pre_process_pattern('Β΅', "def hello(): pass"); /// assert_eq!(result, "def hello(): pass"); /// ``` +#[allow(dead_code)] fn pre_process_pattern(expando: char, query: &str) -> std::borrow::Cow<'_, str> { // Fast path: check if any processing is needed let has_dollar = query.as_bytes().contains(&b'$'); @@ -368,9 +359,11 @@ fn pre_process_pattern(expando: char, query: &str) -> std::borrow::Cow<'_, str> feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "html-napi", feature = "kotlin", + feature = "nix", feature = "php", feature = "python", feature = "ruby", @@ -432,17 +425,20 @@ pub trait Alias: Display { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -501,17 +497,20 @@ macro_rules! impl_alias { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -572,13 +571,21 @@ impl_lang_expando!(Go, language_go, 'Β΅'); // GHC supports Unicode syntax per // https://ghc.gitlab.haskell.org/ghc/doc/users_guide/exts/unicode_syntax.html // and the tree-sitter-haskell grammar parses it too. -#[cfg(feature = "haskell")] +#[cfg(any(feature = "haskell", feature = "all-parsers"))] impl_lang_expando!(Haskell, language_haskell, 'Β΅'); +// https://developer.hashicorp.com/terraform/language/syntax/configuration#identifiers +#[cfg(any(feature = "hcl", feature = "all-parsers"))] +impl_lang_expando!(Hcl, language_hcl, 'Β΅'); + // https://github.com/fwcd/tree-sitter-kotlin/pull/93 #[cfg(any(feature = "kotlin", feature = "all-parsers"))] impl_lang_expando!(Kotlin, language_kotlin, 'Β΅'); +// Nix uses $ for string interpolation (e.g., "${pkgs.hello}") +#[cfg(any(feature = "nix", feature = "all-parsers"))] +impl_lang_expando!(Nix, language_nix, '_'); + // PHP accepts unicode to be used as some name not var name though #[cfg(any(feature = "php", feature = "all-parsers"))] impl_lang_expando!(Php, language_php, 'Β΅'); @@ -619,6 +626,8 @@ impl_lang!(JavaScript, language_javascript); impl_lang!(Json, language_json); #[cfg(any(feature = "lua", feature = "all-parsers"))] impl_lang!(Lua, language_lua); +#[cfg(any(feature = "solidity", feature = "all-parsers"))] +impl_lang!(Solidity, language_solidity); #[cfg(any(feature = "scala", feature = "all-parsers"))] impl_lang!(Scala, language_scala); #[cfg(any( @@ -686,12 +695,14 @@ pub enum SupportLang { feature = "napi-compatible" ))] Css, - #[cfg(any(feature = "go", feature = "all-parsers"))] - Go, #[cfg(any(feature = "elixir", feature = "all-parsers"))] Elixir, - #[cfg(feature = "haskell")] + #[cfg(any(feature = "go", feature = "all-parsers"))] + Go, + #[cfg(any(feature = "haskell", feature = "all-parsers"))] Haskell, + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + Hcl, #[cfg(any( feature = "html", feature = "all-parsers", @@ -714,6 +725,8 @@ pub enum SupportLang { Kotlin, #[cfg(any(feature = "lua", feature = "all-parsers"))] Lua, + #[cfg(any(feature = "nix", feature = "all-parsers"))] + Nix, #[cfg(any(feature = "php", feature = "all-parsers"))] Php, #[cfg(any(feature = "python", feature = "all-parsers"))] @@ -724,6 +737,8 @@ pub enum SupportLang { Rust, #[cfg(any(feature = "scala", feature = "all-parsers"))] Scala, + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + Solidity, #[cfg(any(feature = "swift", feature = "all-parsers"))] Swift, #[cfg(any( @@ -758,17 +773,20 @@ pub enum SupportLang { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", + feature = "nix", feature = "lua", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -800,8 +818,10 @@ impl SupportLang { Elixir, #[cfg(any(feature = "go", feature = "all-parsers"))] Go, - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] Haskell, + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + Hcl, #[cfg(any( feature = "html", feature = "all-parsers", @@ -824,6 +844,8 @@ impl SupportLang { Kotlin, #[cfg(any(feature = "lua", feature = "all-parsers"))] Lua, + #[cfg(any(feature = "nix", feature = "all-parsers"))] + Nix, #[cfg(any(feature = "php", feature = "all-parsers"))] Php, #[cfg(any(feature = "python", feature = "all-parsers"))] @@ -834,6 +856,8 @@ impl SupportLang { Rust, #[cfg(any(feature = "scala", feature = "all-parsers"))] Scala, + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + Solidity, #[cfg(any(feature = "swift", feature = "all-parsers"))] Swift, #[cfg(any( @@ -868,17 +892,20 @@ impl SupportLang { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -936,17 +963,20 @@ impl std::error::Error for SupportLangErr {} feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -960,7 +990,7 @@ impl<'de> Deserialize<'de> for SupportLang { deserializer.deserialize_str(SupportLangVisitor) } } - +#[allow(dead_code)] struct SupportLangVisitor; #[cfg(any( @@ -979,17 +1009,20 @@ struct SupportLangVisitor; feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1010,6 +1043,7 @@ impl Visitor<'_> for SupportLangVisitor { } } +#[allow(dead_code)] struct AliasVisitor { aliases: &'static [&'static str], } @@ -1029,17 +1063,20 @@ struct AliasVisitor { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1080,17 +1117,20 @@ impl Visitor<'_> for AliasVisitor { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1107,17 +1147,20 @@ impl_aliases! { Elixir, "elixir" => &["ex", "elixir"], Go, "go" => &["go", "golang"], Haskell, "haskell" => &["hs", "haskell"], + Hcl, "hcl" => &["hcl", "terraform"], Html, "html" => &["html"], Java, "java" => &["java"], JavaScript, "javascript" => &["javascript", "js", "jsx"], Json, "json" => &["json"], Kotlin, "kotlin" => &["kotlin", "kt"], Lua, "lua" => &["lua"], + Nix, "nix" => &["nix"], Php, "php" => &["php"], Python, "python" => &["py", "python"], Ruby, "ruby" => &["rb", "ruby"], Rust, "rust" => &["rs", "rust"], Scala, "scala" => &["scala"], + Solidity, "solidity" => &["sol", "solidity"], Swift, "swift" => &["swift"], TypeScript, "typescript" => &["ts", "typescript"], Tsx, "tsx" => &["tsx"], @@ -1151,8 +1194,10 @@ impl FromStr for SupportLang { "elixir" | "ex" => Ok(SupportLang::Elixir), #[cfg(any(feature = "go", feature = "all-parsers"))] "go" | "golang" => Ok(SupportLang::Go), - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] "haskell" | "hs" => Ok(SupportLang::Haskell), + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + "hcl" | "terraform" => Ok(SupportLang::Hcl), #[cfg(any( feature = "html", feature = "all-parsers", @@ -1175,6 +1220,8 @@ impl FromStr for SupportLang { "kotlin" | "kt" => Ok(SupportLang::Kotlin), #[cfg(any(feature = "lua", feature = "all-parsers"))] "lua" => Ok(SupportLang::Lua), + #[cfg(any(feature = "nix", feature = "all-parsers"))] + "nix" => Ok(SupportLang::Nix), #[cfg(any(feature = "php", feature = "all-parsers"))] "php" => Ok(SupportLang::Php), #[cfg(any(feature = "python", feature = "all-parsers"))] @@ -1185,6 +1232,8 @@ impl FromStr for SupportLang { "rust" | "rs" => Ok(SupportLang::Rust), #[cfg(any(feature = "scala", feature = "all-parsers"))] "scala" => Ok(SupportLang::Scala), + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + "solidity" | "sol" => Ok(SupportLang::Solidity), #[cfg(any(feature = "swift", feature = "all-parsers"))] "swift" => Ok(SupportLang::Swift), #[cfg(any( @@ -1219,17 +1268,20 @@ impl FromStr for SupportLang { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1269,17 +1321,20 @@ impl FromStr for SupportLang { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1303,11 +1358,13 @@ macro_rules! execute_lang_method { S::Elixir => Elixir.$method($($pname,)*), #[cfg(any(feature = "go", feature = "all-parsers"))] S::Go => Go.$method($($pname,)*), - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] S::Haskell => Haskell.$method($($pname,)*), + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + S::Hcl => Hcl.$method($($pname,)*), #[cfg(any(feature = "html", feature = "all-parsers", feature = "html-napi", feature = "napi-compatible"))] S::Html => Html.$method($($pname,)*), - #[cfg(any(feature = "json", feature = "all-parsers"))] + #[cfg(any(feature = "java", feature = "all-parsers"))] S::Java => Java.$method($($pname,)*), #[cfg(any(feature = "javascript", feature = "all-parsers", feature = "javascript-napi", feature = "napi-compatible"))] S::JavaScript => JavaScript.$method($($pname,)*), @@ -1317,6 +1374,8 @@ macro_rules! execute_lang_method { S::Kotlin => Kotlin.$method($($pname,)*), #[cfg(any(feature = "lua", feature = "all-parsers"))] S::Lua => Lua.$method($($pname,)*), + #[cfg(any(feature = "nix", feature = "all-parsers"))] + S::Nix => Nix.$method($($pname,)*), #[cfg(any(feature = "php", feature = "all-parsers"))] S::Php => Php.$method($($pname,)*), #[cfg(any(feature = "python", feature = "all-parsers"))] @@ -1327,6 +1386,8 @@ macro_rules! execute_lang_method { S::Rust => Rust.$method($($pname,)*), #[cfg(any(feature = "scala", feature = "all-parsers"))] S::Scala => Scala.$method($($pname,)*), + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + S::Solidity => Solidity.$method($($pname,)*), #[cfg(any(feature = "swift", feature = "all-parsers"))] S::Swift => Swift.$method($($pname,)*), #[cfg(any(feature = "tsx", feature = "all-parsers", feature = "tsx-napi", feature = "napi-compatible"))] @@ -1351,17 +1412,20 @@ macro_rules! execute_lang_method { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1391,17 +1455,20 @@ macro_rules! execute_lang_method { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1429,17 +1496,20 @@ macro_rules! impl_lang_method { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1479,17 +1549,20 @@ impl Language for SupportLang { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1533,8 +1606,10 @@ pub const fn extensions(lang: SupportLang) -> &'static [&'static str] { Elixir => &constants::ELIXIR_EXTS, #[cfg(any(feature = "go", feature = "all-parsers"))] Go => &constants::GO_EXTS, - #[cfg(feature = "haskell")] + #[cfg(any(feature = "haskell", feature = "all-parsers"))] Haskell => &constants::HASKELL_EXTS, + #[cfg(any(feature = "hcl", feature = "all-parsers"))] + Hcl => &constants::HCL_EXTS, #[cfg(any( feature = "html", feature = "all-parsers", @@ -1557,6 +1632,8 @@ pub const fn extensions(lang: SupportLang) -> &'static [&'static str] { Kotlin => &constants::KOTLIN_EXTS, #[cfg(any(feature = "lua", feature = "all-parsers"))] Lua => &constants::LUA_EXTS, + #[cfg(any(feature = "nix", feature = "all-parsers"))] + Nix => &constants::NIX_EXTS, #[cfg(any(feature = "php", feature = "all-parsers"))] Php => &constants::PHP_EXTS, #[cfg(any(feature = "python", feature = "all-parsers"))] @@ -1567,6 +1644,8 @@ pub const fn extensions(lang: SupportLang) -> &'static [&'static str] { Rust => &constants::RUST_EXTS, #[cfg(any(feature = "scala", feature = "all-parsers"))] Scala => &constants::SCALA_EXTS, + #[cfg(any(feature = "solidity", feature = "all-parsers"))] + Solidity => &constants::SOLIDITY_EXTS, #[cfg(any(feature = "swift", feature = "all-parsers"))] Swift => &constants::SWIFT_EXTS, #[cfg(any( @@ -1602,17 +1681,20 @@ pub const fn extensions(lang: SupportLang) -> &'static [&'static str] { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -1764,6 +1846,7 @@ mod test { let test_cases = [ ("main.rs", Some(SupportLang::Rust)), ("app.js", Some(SupportLang::JavaScript)), + ("main.tf", Some(SupportLang::Hcl)), ("index.html", Some(SupportLang::Html)), ("data.json", Some(SupportLang::Json)), ("script.py", Some(SupportLang::Python)), @@ -1771,6 +1854,7 @@ mod test { ("style.css", Some(SupportLang::Css)), ("component.tsx", Some(SupportLang::Tsx)), ("build.gradle.kts", Some(SupportLang::Kotlin)), + ("somefile.nix", Some(SupportLang::Nix)), ("config.yml", Some(SupportLang::Yaml)), ("script.sh", Some(SupportLang::Bash)), ("app.swift", Some(SupportLang::Swift)), @@ -1778,6 +1862,7 @@ mod test { ("header.hpp", Some(SupportLang::Cpp)), ("style.scss", Some(SupportLang::Css)), ("script.rb", Some(SupportLang::Ruby)), + ("supercryptodude.sol", Some(SupportLang::Solidity)), ("main.scala", Some(SupportLang::Scala)), ("app.kt", Some(SupportLang::Kotlin)), // Case insensitive tests diff --git a/crates/language/src/lua.rs b/crates/language/src/lua.rs index cc529ae..3f27199 100644 --- a/crates/language/src/lua.rs +++ b/crates/language/src/lua.rs @@ -1,5 +1,4 @@ #![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos diff --git a/crates/language/src/nix.rs b/crates/language/src/nix.rs new file mode 100644 index 0000000..68f9b7f --- /dev/null +++ b/crates/language/src/nix.rs @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT +#![cfg(test)] +use super::*; +use crate::test::{test_match_lang, test_replace_lang}; + +fn test_match(s1: &str, s2: &str) { + test_match_lang(s1, s2, Nix) +} + +#[test] +fn test_nix_pattern() { + test_match("$A + $B", "1 + 2"); + test_match("{ $A = $B; }", "{ foo = bar; }"); + test_match("with $A; $B", "with pkgs; hello"); + test_match("let $A = $B; in $C", "let x = 5; in x + 1"); +} + +#[test] +fn test_nix_function() { + test_match("$A: $B", "x: x + 1"); + test_match("{ $A, $B }: $C", "{ foo, bar }: foo + bar"); + test_match("{ $A ? $B }: $C", "{ x ? 5 }: x * 2"); +} + +#[test] +fn test_nix_list() { + test_match("[ $A $B ]", "[ 1 2 ]"); + test_match("[ $$$ITEMS ]", "[ 1 2 3 4 5 ]"); +} + +#[test] +fn test_nix_string() { + test_match("\"$A\"", "\"hello\""); + test_match("''$A''", "''multi\nline''"); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + test_replace_lang(src, pattern, replacer, Nix) +} + +#[test] +fn test_nix_replace() { + let ret = test_replace("1 + 2", "$A + $B", "$B + $A"); + assert_eq!(ret, "2 + 1"); + + let ret = test_replace("{ foo = bar; }", "{ $A = $B; }", "{ $B = $A; }"); + assert_eq!(ret, "{ bar = foo; }"); + + let ret = test_replace( + "let x = 5; in x + 1", + "let $A = $B; in $C", + "let $A = $B * 2; in $C", + ); + assert_eq!(ret, "let x = 5 * 2; in x + 1"); +} diff --git a/crates/language/src/parsers.rs b/crates/language/src/parsers.rs index 2fc3af1..abcbc2d 100644 --- a/crates/language/src/parsers.rs +++ b/crates/language/src/parsers.rs @@ -64,17 +64,20 @@ feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -136,17 +139,20 @@ macro_rules! into_lang { feature = "elixir", feature = "go", feature = "haskell", + feature = "hcl", feature = "html", feature = "java", feature = "javascript", feature = "json", feature = "kotlin", feature = "lua", + feature = "nix", feature = "php", feature = "python", feature = "ruby", feature = "rust", feature = "scala", + feature = "solidity", feature = "swift", feature = "tsx", feature = "typescript", @@ -199,8 +205,10 @@ static CSS_LANG: OnceLock = OnceLock::new(); static ELIXIR_LANG: OnceLock = OnceLock::new(); #[cfg(any(feature = "go", feature = "all-parsers"))] static GO_LANG: OnceLock = OnceLock::new(); -#[cfg(feature = "haskell")] +#[cfg(any(feature = "haskell", feature = "all-parsers"))] static HASKELL_LANG: OnceLock = OnceLock::new(); +#[cfg(any(feature = "hcl", feature = "all-parsers"))] +static HCL_LANG: OnceLock = OnceLock::new(); #[cfg(any( feature = "html", feature = "all-parsers", @@ -223,6 +231,8 @@ static JSON_LANG: OnceLock = OnceLock::new(); static KOTLIN_LANG: OnceLock = OnceLock::new(); #[cfg(any(feature = "lua", feature = "all-parsers"))] static LUA_LANG: OnceLock = OnceLock::new(); +#[cfg(any(feature = "nix", feature = "all-parsers"))] +static NIX_LANG: OnceLock = OnceLock::new(); #[cfg(any(feature = "php", feature = "all-parsers"))] static PHP_LANG: OnceLock = OnceLock::new(); #[cfg(any(feature = "python", feature = "all-parsers"))] @@ -233,6 +243,8 @@ static RUBY_LANG: OnceLock = OnceLock::new(); static RUST_LANG: OnceLock = OnceLock::new(); #[cfg(any(feature = "scala", feature = "all-parsers"))] static SCALA_LANG: OnceLock = OnceLock::new(); +#[cfg(any(feature = "solidity", feature = "all-parsers"))] +static SOLIDITY_LANG: OnceLock = OnceLock::new(); #[cfg(any(feature = "swift", feature = "all-parsers"))] static SWIFT_LANG: OnceLock = OnceLock::new(); #[cfg(any( @@ -294,12 +306,18 @@ pub fn language_go() -> TSLanguage { GO_LANG.get_or_init(|| into_lang!(tree_sitter_go)).clone() } -#[cfg(feature = "haskell")] +#[cfg(any(feature = "haskell", feature = "all-parsers"))] pub fn language_haskell() -> TSLanguage { HASKELL_LANG .get_or_init(|| into_lang!(tree_sitter_haskell)) .clone() } + +#[cfg(any(feature = "hcl", feature = "all-parsers"))] +pub fn language_hcl() -> TSLanguage { + HCL_LANG.get_or_init(|| into_lang!(tree_sitter_hcl)).clone() +} + #[cfg(any( feature = "html", feature = "all-parsers", @@ -346,6 +364,12 @@ pub fn language_kotlin() -> TSLanguage { pub fn language_lua() -> TSLanguage { LUA_LANG.get_or_init(|| into_lang!(tree_sitter_lua)).clone() } + +#[cfg(any(feature = "nix", feature = "all-parsers"))] +pub fn language_nix() -> TSLanguage { + NIX_LANG.get_or_init(|| into_lang!(tree_sitter_nix)).clone() +} + #[cfg(any(feature = "php", feature = "all-parsers"))] pub fn language_php() -> TSLanguage { PHP_LANG @@ -376,6 +400,12 @@ pub fn language_scala() -> TSLanguage { .get_or_init(|| into_lang!(tree_sitter_scala)) .clone() } +#[cfg(any(feature = "solidity", feature = "all-parsers"))] +pub fn language_solidity() -> TSLanguage { + SOLIDITY_LANG + .get_or_init(|| into_lang!(tree_sitter_solidity)) + .clone() +} #[cfg(any(feature = "swift", feature = "all-parsers"))] pub fn language_swift() -> TSLanguage { SWIFT_LANG diff --git a/crates/language/src/php.rs b/crates/language/src/php.rs index 8e0982f..bcb70f3 100644 --- a/crates/language/src/php.rs +++ b/crates/language/src/php.rs @@ -1,11 +1,11 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT +#![cfg(test)] + use super::*; fn test_match(query: &str, source: &str) { diff --git a/crates/language/src/profiling.rs b/crates/language/src/profiling.rs index fff35b0..854b7f8 100644 --- a/crates/language/src/profiling.rs +++ b/crates/language/src/profiling.rs @@ -23,23 +23,22 @@ static PEAK_USAGE: AtomicUsize = AtomicUsize::new(0); unsafe impl GlobalAlloc for MemoryProfiler { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - let ptr = System.alloc(layout); + let ptr = unsafe { System.alloc(layout) }; if !ptr.is_null() { - let size = layout.size(); - let old_allocated = ALLOCATED.fetch_add(size, Ordering::Relaxed); - let current_usage = old_allocated + size - DEALLOCATED.load(Ordering::Relaxed); + ALLOCATED.fetch_add(layout.size(), Ordering::SeqCst); // Update peak usage - let mut peak = PEAK_USAGE.load(Ordering::Relaxed); - while current_usage > peak { + let current = ALLOCATED.load(Ordering::SeqCst) - DEALLOCATED.load(Ordering::SeqCst); + let mut peak = PEAK_USAGE.load(Ordering::SeqCst); + while current > peak { match PEAK_USAGE.compare_exchange_weak( peak, - current_usage, - Ordering::Relaxed, - Ordering::Relaxed, + current, + Ordering::SeqCst, + Ordering::SeqCst, ) { Ok(_) => break, - Err(x) => peak = x, + Err(actual) => peak = peak.max(actual), } } } @@ -47,8 +46,8 @@ unsafe impl GlobalAlloc for MemoryProfiler { } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { - System.dealloc(ptr, layout); - DEALLOCATED.fetch_add(layout.size(), Ordering::Relaxed); + unsafe { System.dealloc(ptr, layout) }; + DEALLOCATED.fetch_add(layout.size(), Ordering::SeqCst); } } diff --git a/crates/language/src/python.rs b/crates/language/src/python.rs index b8ea420..b9d5838 100644 --- a/crates/language/src/python.rs +++ b/crates/language/src/python.rs @@ -1,11 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT - +#![cfg(test)] use super::*; fn test_match(query: &str, source: &str) { diff --git a/crates/language/src/ruby.rs b/crates/language/src/ruby.rs index da3d44b..ce524a9 100644 --- a/crates/language/src/ruby.rs +++ b/crates/language/src/ruby.rs @@ -1,10 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT +#![cfg(test)] use super::*; use thread_ast_engine::Pattern; diff --git a/crates/language/src/rust.rs b/crates/language/src/rust.rs index 42c0f56..c311f02 100644 --- a/crates/language/src/rust.rs +++ b/crates/language/src/rust.rs @@ -1,11 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT - +#![cfg(test)] use super::*; use crate::test::{test_match_lang, test_replace_lang}; diff --git a/crates/language/src/scala.rs b/crates/language/src/scala.rs index 6cc596c..83fa219 100644 --- a/crates/language/src/scala.rs +++ b/crates/language/src/scala.rs @@ -1,4 +1,3 @@ -#![cfg(test)] // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos @@ -8,7 +7,10 @@ //! Standalone Scala file to test syntax. //! Scala does not need special processing and can be a stub lang. //! But this file is created for testing Scala2 and Scala3. - +#![cfg(test)] +//! Standalone Scala file to test syntax. +//! Scala does not need special processing and can be a stub lang. +//! But this file is created for testing Scala2 and Scala3. use super::*; fn test_match(query: &str, source: &str) { diff --git a/crates/language/src/solidity.rs b/crates/language/src/solidity.rs new file mode 100644 index 0000000..92fb5d3 --- /dev/null +++ b/crates/language/src/solidity.rs @@ -0,0 +1,59 @@ +#![cfg(test)] +// SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> +// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// +// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT + +use super::*; + +fn test_match(query: &str, source: &str) { + use crate::test::test_match_lang; + test_match_lang(query, source, Solidity); +} + +fn test_non_match(query: &str, source: &str) { + use crate::test::test_non_match_lang; + test_non_match_lang(query, source, Solidity); +} + +#[test] +fn test_solidity_str() { + test_match("pragma solidity 0.8.28;", "pragma solidity 0.8.28;"); + test_match( + r#"import { Test } from "forge-std/Test.sol";"#, + r#"import { Test } from "forge-std/Test.sol";"#, + ); + test_non_match("pragma solidity 0.8.28;", "pragma solidity 0.8.26;"); + test_non_match( + r#"import { Test } from "forge-std/Test.sol";"#, + r#"import { console } from "forge-std/Test.sol";"#, + ); +} + +#[test] +fn test_solidity_pattern() { + test_match( + r#"import { $A } from "forge-std/Test.sol";"#, + r#"import { Test } from "forge-std/Test.sol";"#, + ); + test_match( + r#"import { $$$ } from "forge-std/Test.sol";"#, + r#"import { Test, console } from "forge-std/Test.sol";"#, + ); +} + +fn test_replace(src: &str, pattern: &str, replacer: &str) -> String { + use crate::test::test_replace_lang; + test_replace_lang(src, pattern, replacer, Solidity) +} + +#[test] +fn test_solidity_replace() { + let ret = test_replace( + r#"import { Test } from "forge-std/Test.sol";"#, + "Test", + "console", + ); + assert_eq!(ret, r#"import { console } from "forge-std/Test.sol";"#); +} diff --git a/crates/language/src/swift.rs b/crates/language/src/swift.rs index 9a2a2f0..6d0e68e 100644 --- a/crates/language/src/swift.rs +++ b/crates/language/src/swift.rs @@ -1,11 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT - +#![cfg(test)] use super::*; fn test_match(query: &str, source: &str) { diff --git a/crates/language/src/yaml.rs b/crates/language/src/yaml.rs index 1c02b73..d2855ae 100644 --- a/crates/language/src/yaml.rs +++ b/crates/language/src/yaml.rs @@ -1,11 +1,9 @@ -#![cfg(test)] - // SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> // SPDX-FileCopyrightText: 2025 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT - +#![cfg(test)] use super::*; fn test_match(query: &str, source: &str) { diff --git a/crates/rule-engine/Cargo.toml b/crates/rule-engine/Cargo.toml index 94708dc..003a273 100644 --- a/crates/rule-engine/Cargo.toml +++ b/crates/rule-engine/Cargo.toml @@ -17,19 +17,6 @@ include.workspace = true # [features] # we need to separate serialization, but that's a big job, and ideally rework ast-engine to allow narrower featuring - - - - - - - - - - - - - [dependencies] bit-set.workspace = true globset = "0.4.16" @@ -49,12 +36,12 @@ thread-utils = { workspace = true, default-features = false, features = [ # ast-grep-config = { version = "0.39.1" } # ast-grep-core = { version = "0.39.1", features = ["tree-sitter"] } # ast-grep-language = { version = "0.39.1", features = ["builtin-parser"] } -criterion = { version = "0.6", features = ["html_reports"] } +criterion = { version = "0.8.2", features = ["html_reports"] } thread-ast-engine = { workspace = true, features = ["matching", "parsing"] } thread-language = { workspace = true, features = ["all-parsers"] } tree-sitter.workspace = true -tree-sitter-javascript = "0.23.1" -tree-sitter-python = "0.23.6" +tree-sitter-javascript = "0.25.0" +tree-sitter-python = "0.25.0" tree-sitter-rust = "0.24.0" tree-sitter-typescript = "0.23.2" diff --git a/crates/rule-engine/benches/ast_grep_comparison.rs b/crates/rule-engine/benches/ast_grep_comparison.rs index 3016175..b35c9a7 100644 --- a/crates/rule-engine/benches/ast_grep_comparison.rs +++ b/crates/rule-engine/benches/ast_grep_comparison.rs @@ -1,24 +1,17 @@ -// SPDX-FileCopyrightText: 2025 Knitli Inc. +// SPDX-FileCopyrightText: 2026 Knitli Inc. // SPDX-FileContributor: Adam Poulemanos // SPDX-License-Identifier: MIT OR Apache-2.0 use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; use std::hint::black_box; -// Thread imports -use thread_language::{LanguageExt as ThreadLanguageExt, SupportLang as ThreadSupportLang}; +use thread_ast_engine::tree_sitter::LanguageExt as ThreadLanguageExt; +use thread_language::SupportLang as ThreadSupportLang; use thread_rule_engine::{ CombinedScan as ThreadCombinedScan, GlobalRules as ThreadGlobalRules, from_yaml_string as thread_from_yaml_string, }; -// AstGrep imports -use ast_grep_config::{ - CombinedScan as AstGrepCombinedScan, GlobalRules as AstGrepGlobalRules, - from_yaml_string as ast_grep_from_yaml_string, -}; -use ast_grep_language::{LanguageExt as AstGrepLanguageExt, SupportLang as AstGrepSupportLang}; - struct ComparisonData { rules: Vec<&'static str>, test_code: &'static str, @@ -43,19 +36,19 @@ severity: info language: TypeScript rule: pattern: function $F($$$) { $$$ } -"#, /* - r#" -id: class-with-constructor -message: found class with constructor -severity: info -language: TypeScript -rule: - all: - - pattern: class $C { $$$ } - - has: - pattern: constructor_type($$$) { $$$ } - stopBy: end -"#,*/ +"#, + // r#" + // id: class-with-constructor + // message: found class with constructor + // severity: info + // language: TypeScript + // rule: + // all: + // - pattern: class $C { $$$ } + // - has: + // pattern: constructor_type($$$) { $$$ } + // stopBy: end + // "#, r#" id: import-statement message: found import statement @@ -103,20 +96,6 @@ fn bench_rule_parsing_comparison(c: &mut Criterion) { }); }, ); - - // Benchmark ast-grep-config - group.bench_with_input( - BenchmarkId::new("ast_grep_config", rule_idx), - rule_yaml, - |b, yaml| { - let globals = AstGrepGlobalRules::default(); - b.iter(|| { - let _rules = - ast_grep_from_yaml_string::(black_box(yaml), &globals) - .expect("should parse"); - }); - }, - ); } group.finish(); @@ -135,20 +114,11 @@ rule: pattern: console.log($A) "#; - // Prepare rules for both libraries let thread_globals = ThreadGlobalRules::default(); - let ast_grep_globals = AstGrepGlobalRules::default(); - let thread_rules = thread_from_yaml_string::(test_rule, &thread_globals) .expect("should parse"); - let ast_grep_rules = - ast_grep_from_yaml_string::(test_rule, &ast_grep_globals) - .expect("should parse"); - let thread_grep = ThreadSupportLang::TypeScript.ast_grep(data.test_code); - let ast_grep_grep = AstGrepSupportLang::TypeScript.ast_grep(data.test_code); - // Benchmark thread-rule-engine group.bench_function("thread_rule_engine", |b| { b.iter(|| { let matches: Vec<_> = thread_grep @@ -159,17 +129,6 @@ rule: }); }); - // Benchmark ast-grep-config - group.bench_function("ast_grep_config", |b| { - b.iter(|| { - let matches: Vec<_> = ast_grep_grep - .root() - .find_all(&ast_grep_rules[0].matcher) - .collect(); - black_box(matches); - }); - }); - group.finish(); } @@ -177,12 +136,8 @@ fn bench_combined_scan_comparison(c: &mut Criterion) { let data = ComparisonData::new(); let mut group = c.benchmark_group("combined_scan_comparison"); - // Prepare rules for both libraries let thread_globals = ThreadGlobalRules::default(); - let ast_grep_globals = AstGrepGlobalRules::default(); - let mut thread_rules = Vec::new(); - let mut ast_grep_rules = Vec::new(); for rule_yaml in &data.rules { let thread_rule = thread_from_yaml_string::(rule_yaml, &thread_globals) @@ -190,28 +145,13 @@ fn bench_combined_scan_comparison(c: &mut Criterion) { .into_iter() .next() .unwrap(); - let ast_grep_rule = - ast_grep_from_yaml_string::(rule_yaml, &ast_grep_globals) - .expect("should parse") - .into_iter() - .next() - .unwrap(); - thread_rules.push(thread_rule); - ast_grep_rules.push(ast_grep_rule); } - // Create combined scanners let thread_rule_refs: Vec<_> = thread_rules.iter().collect(); - let ast_grep_rule_refs: Vec<_> = ast_grep_rules.iter().collect(); - let thread_combined_scan = ThreadCombinedScan::new(thread_rule_refs); - let ast_grep_combined_scan = AstGrepCombinedScan::new(ast_grep_rule_refs); - let thread_grep = ThreadSupportLang::TypeScript.ast_grep(data.test_code); - let ast_grep_grep = AstGrepSupportLang::TypeScript.ast_grep(data.test_code); - // Benchmark thread-rule-engine group.bench_function("thread_rule_engine", |b| { b.iter(|| { let result = thread_combined_scan.scan(black_box(&thread_grep), false); @@ -219,14 +159,6 @@ fn bench_combined_scan_comparison(c: &mut Criterion) { }); }); - // Benchmark ast-grep-config - group.bench_function("ast_grep_config", |b| { - b.iter(|| { - let result = ast_grep_combined_scan.scan(black_box(&ast_grep_grep), false); - black_box(result); - }); - }); - group.finish(); } @@ -251,22 +183,6 @@ fn bench_memory_usage_comparison(c: &mut Criterion) { }); }); - group.bench_function("ast_grep_config_memory", |b| { - let globals = AstGrepGlobalRules::default(); - b.iter(|| { - let mut rules = Vec::new(); - for rule_yaml in &data.rules { - let rule = ast_grep_from_yaml_string::(rule_yaml, &globals) - .expect("should parse") - .into_iter() - .next() - .unwrap(); - rules.push(rule); - } - black_box(rules); - }); - }); - group.finish(); } diff --git a/crates/rule-engine/benches/comparison_benchmarks.rs b/crates/rule-engine/benches/comparison_benchmarks.rs index 91ecb07..59f4aac 100644 --- a/crates/rule-engine/benches/comparison_benchmarks.rs +++ b/crates/rule-engine/benches/comparison_benchmarks.rs @@ -5,20 +5,13 @@ use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; use std::hint::black_box; -use thread_language::{LanguageExt as ThreadLanguageExt, SupportLang as ThreadSupportLang}; - -use ast_grep_language::{LanguageExt as AstGrepLanguageExt, SupportLang as AstGrepSupportLang}; - +use thread_ast_engine::tree_sitter::LanguageExt as ThreadLanguageExt; +use thread_language::SupportLang as ThreadSupportLang; use thread_rule_engine::{ CombinedScan as ThreadCombinedScan, GlobalRules as ThreadGlobalRules, from_yaml_string as thread_from_yaml_string, }; -use ast_grep_config::{ - CombinedScan as AstGrepCombinedScan, GlobalRules as AstGrepGlobalRules, - from_yaml_string as ast_grep_from_yaml_string, -}; - struct ComparisonData { rules: Vec<&'static str>, test_code: &'static str, @@ -80,22 +73,6 @@ fn bench_rule_parsing_comparison(c: &mut Criterion) { }); }, ); - - // Benchmark ast-grep-config - group.bench_with_input( - BenchmarkId::new("ast_grep_config", rule_idx), - rule_yaml, - |b, yaml| { - let globals = AstGrepGlobalRules::default(); - b.iter(|| { - let _rules = ast_grep_from_yaml_string::( - black_box(yaml), - &globals, - ) - .expect("should parse"); - }); - }, - ); } group.finish(); @@ -114,27 +91,14 @@ rule: pattern: console.log($A) "#; - // Prepare rules for both libraries let thread_globals = ThreadGlobalRules::default(); - let ast_grep_globals = AstGrepGlobalRules::default(); let thread_rule = thread_from_yaml_string::(test_rule, &thread_globals) .expect("should parse")[0] .clone(); - let ast_grep_rule_config = - ast_grep_from_yaml_string::(test_rule, &ast_grep_globals) - .expect("should parse")[0] - .clone(); - - // Convert the config to a RuleCore to get the matcher - let ast_grep_rule = - ast_grep_config::RuleConfig::try_from(ast_grep_rule_config, &ast_grep_globals) - .expect("should convert to RuleCore"); let thread_grep = ThreadSupportLang::TypeScript.ast_grep(data.test_code); - let ast_grep_grep = AstGrepSupportLang::TypeScript.ast_grep(data.test_code); - // Benchmark thread-rule-engine group.bench_function("thread_rule_engine", |b| { b.iter(|| { let matches: Vec<_> = Vec::from_iter(thread_grep.root().find_all(&thread_rule.matcher)); @@ -142,16 +106,6 @@ rule: }); }); - // Benchmark ast-grep-config - group.bench_function("ast_grep_config", |b| { - b.iter(|| { - // Use the same matcher as in thread_rule_engine - let matches: Vec<_> = - Vec::from_iter(ast_grep_grep.root().find_all(&ast_grep_rule.matcher)); - black_box(matches); - }); - }); - group.finish(); } @@ -159,12 +113,9 @@ fn bench_combined_scan_comparison(c: &mut Criterion) { let data = ComparisonData::new(); let mut group = c.benchmark_group("combined_scan_comparison"); - // Prepare rules for both libraries let thread_globals = ThreadGlobalRules::default(); - let ast_grep_globals = AstGrepGlobalRules::default(); let mut thread_rules = Vec::new(); - let mut ast_grep_rules = Vec::new(); for rule_yaml in &data.rules { let thread_rule = thread_from_yaml_string::(rule_yaml, &thread_globals) @@ -172,30 +123,13 @@ fn bench_combined_scan_comparison(c: &mut Criterion) { .into_iter() .next() .unwrap(); - let ast_grep_rule = ast_grep_from_yaml_string::( - rule_yaml, - &ast_grep_globals, - ) - .expect("should parse") - .into_iter() - .next() - .unwrap(); - thread_rules.push(thread_rule); - ast_grep_rules.push(ast_grep_rule); } - // Create combined scanners let thread_rule_refs: Vec<_> = thread_rules.iter().collect(); - let ast_grep_rule_refs: Vec<_> = ast_grep_rules.iter().collect(); - let thread_combined_scan = ThreadCombinedScan::new(thread_rule_refs); - let ast_grep_combined_scan = AstGrepCombinedScan::new(ast_grep_rule_refs); - let thread_grep = ThreadSupportLang::TypeScript.ast_grep(data.test_code); - let ast_grep_grep = ast_grep_language::SupportLang::TypeScript.ast_grep(data.test_code); - // Benchmark thread-rule-engine group.bench_function("thread_rule_engine", |b| { b.iter(|| { let result = thread_combined_scan.scan(black_box(&thread_grep), false); @@ -203,14 +137,6 @@ fn bench_combined_scan_comparison(c: &mut Criterion) { }); }); - // Benchmark ast-grep-config - group.bench_function("ast_grep_config", |b| { - b.iter(|| { - let result = ast_grep_combined_scan.scan(black_box(&ast_grep_grep), false); - black_box(result); - }); - }); - group.finish(); } @@ -224,29 +150,11 @@ fn bench_memory_usage_comparison(c: &mut Criterion) { b.iter(|| { let mut rules = Vec::new(); for rule_yaml in &data.rules { - let rule = - thread_from_yaml_string::(rule_yaml, &globals) - .expect("should parse") - .into_iter() - .next() - .unwrap(); - rules.push(rule); - } - black_box(rules); - }); - }); - - group.bench_function("ast_grep_config_memory", |b| { - let globals = AstGrepGlobalRules::default(); - b.iter(|| { - let mut rules = Vec::new(); - for rule_yaml in &data.rules { - let rule = - ast_grep_from_yaml_string::(rule_yaml, &globals) - .expect("should parse") - .into_iter() - .next() - .unwrap(); + let rule = thread_from_yaml_string::(rule_yaml, &globals) + .expect("should parse") + .into_iter() + .next() + .unwrap(); rules.push(rule); } black_box(rules); diff --git a/crates/rule-engine/benches/rule_engine_benchmarks.rs b/crates/rule-engine/benches/rule_engine_benchmarks.rs index 7bed1ba..85e3be5 100644 --- a/crates/rule-engine/benches/rule_engine_benchmarks.rs +++ b/crates/rule-engine/benches/rule_engine_benchmarks.rs @@ -5,8 +5,8 @@ use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; use std::hint::black_box; -use thread_language::{LanguageExt, SupportLang}; - +use thread_ast_engine::tree_sitter::LanguageExt; +use thread_language::SupportLang; use thread_rule_engine::{CombinedScan, GlobalRules, RuleCollection, from_yaml_string}; pub type BenchLanguage = SupportLang; diff --git a/crates/rule-engine/serialization_analysis/analyze_serialization.rs b/crates/rule-engine/serialization_analysis/analyze_serialization.rs index 0d49bcb..345691a 100644 --- a/crates/rule-engine/serialization_analysis/analyze_serialization.rs +++ b/crates/rule-engine/serialization_analysis/analyze_serialization.rs @@ -34,7 +34,6 @@ pub enum DependencyType { DeserializationCall, YamlCall, - // Schema generation JsonSchemaUsage, SchemaGeneration, @@ -55,22 +54,23 @@ pub enum DependencyType { impl DependencyType { pub fn category(&self) -> &'static str { match self { - DependencyType::SerdeDerive | - DependencyType::SerdeImport | - DependencyType::SerializableType | - DependencyType::SerdeAttribute => "Core Serialization", + DependencyType::SerdeDerive + | DependencyType::SerdeImport + | DependencyType::SerializableType + | DependencyType::SerdeAttribute => "Core Serialization", - DependencyType::SerializationCall | - DependencyType::DeserializationCall | - DependencyType::YamlCall => "Serialization Operations", + DependencyType::SerializationCall + | DependencyType::DeserializationCall + | DependencyType::YamlCall => "Serialization Operations", - DependencyType::JsonSchemaUsage | - DependencyType::SchemaGeneration => "Schema Generation", + DependencyType::JsonSchemaUsage | DependencyType::SchemaGeneration => { + "Schema Generation" + } - DependencyType::DeserializeEnvUsage | - DependencyType::MaybeWrapper | - DependencyType::TransformFunction | - DependencyType::ConfigCreation => "Crate-Specific Serialization", + DependencyType::DeserializeEnvUsage + | DependencyType::MaybeWrapper + | DependencyType::TransformFunction + | DependencyType::ConfigCreation => "Crate-Specific Serialization", DependencyType::SerializationError => "Error Handling", } @@ -79,33 +79,33 @@ impl DependencyType { pub fn severity(&self) -> SerializationSeverity { match self { // High impact - these are fundamental to serialization - DependencyType::SerdeDerive | - DependencyType::SerializableType | - DependencyType::DeserializeEnvUsage => SerializationSeverity::High, + DependencyType::SerdeDerive + | DependencyType::SerializableType + | DependencyType::DeserializeEnvUsage => SerializationSeverity::High, // Medium impact - important but could potentially be abstracted - DependencyType::SerializationCall | - DependencyType::DeserializationCall | - DependencyType::YamlCall | - DependencyType::JsonSchemaUsage | - DependencyType::TransformFunction | - DependencyType::ConfigCreation => SerializationSeverity::Medium, + DependencyType::SerializationCall + | DependencyType::DeserializationCall + | DependencyType::YamlCall + | DependencyType::JsonSchemaUsage + | DependencyType::TransformFunction + | DependencyType::ConfigCreation => SerializationSeverity::Medium, // Low impact - imports and attributes that could be feature-gated - DependencyType::SerdeImport | - DependencyType::SerdeAttribute | - DependencyType::SchemaGeneration | - DependencyType::MaybeWrapper | - DependencyType::SerializationError => SerializationSeverity::Low, + DependencyType::SerdeImport + | DependencyType::SerdeAttribute + | DependencyType::SchemaGeneration + | DependencyType::MaybeWrapper + | DependencyType::SerializationError => SerializationSeverity::Low, } } } #[derive(Debug, Clone, PartialEq, Eq)] pub enum SerializationSeverity { - High, // Core to serialization, hard to separate - Medium, // Important but could be abstracted - Low, // Can be feature-gated or easily separated + High, // Core to serialization, hard to separate + Medium, // Important but could be abstracted + Low, // Can be feature-gated or easily separated } #[derive(Debug)] @@ -206,11 +206,16 @@ impl SerializationAnalysisReport { } } - fn detect_serialization_dependency(line: &str, line_number: usize) -> Option { + fn detect_serialization_dependency( + line: &str, + line_number: usize, + ) -> Option { let line = line.trim(); // Check for various serialization patterns - if line.contains("#[derive(") && (line.contains("Serialize") || line.contains("Deserialize")) { + if line.contains("#[derive(") + && (line.contains("Serialize") || line.contains("Deserialize")) + { return Some(SerializationDependency { file_path: String::new(), // Will be set by caller line_number, @@ -220,7 +225,11 @@ impl SerializationAnalysisReport { }); } - if line.starts_with("use serde") || line.contains("use serde_yaml") || line.contains("use serde_json") || line.contains("use schemars") { + if line.starts_with("use serde") + || line.contains("use serde_yaml") + || line.contains("use serde_json") + || line.contains("use schemars") + { return Some(SerializationDependency { file_path: String::new(), line_number, @@ -230,12 +239,20 @@ impl SerializationAnalysisReport { }); } - if line.contains("deserialize(") || line.contains("serialize(") || line.contains("yaml::") || line.contains("serde_yaml::") || line.contains("from_yaml_string") { + if line.contains("deserialize(") + || line.contains("serialize(") + || line.contains("yaml::") + || line.contains("serde_yaml::") + || line.contains("from_yaml_string") + { let dep_type = if line.contains("deserialize(") { DependencyType::DeserializationCall } else if line.contains("serialize(") { DependencyType::SerializationCall - } else if line.contains("yaml::") || line.contains("serde_yaml::") || line.contains("from_yaml_string") { + } else if line.contains("yaml::") + || line.contains("serde_yaml::") + || line.contains("from_yaml_string") + { DependencyType::YamlCall }; @@ -285,24 +302,33 @@ impl SerializationAnalysisReport { let line = line.trim(); // Look for core functionality patterns - if line.starts_with("impl Matcher") || - line.starts_with("impl Pattern") || - line.starts_with("impl Rule") || - line.starts_with("impl<") && line.contains("RuleMatcher") || - line.starts_with("impl<") && line.contains("Matcher") || - line.starts_with("fn match_node") || - line.starts_with("fn potential_kinds") || - line.contains("find(") || - line.contains("ast_grep(") { + if line.starts_with("impl Matcher") + || line.starts_with("impl Pattern") + || line.starts_with("impl Rule") + || line.starts_with("impl<") && line.contains("RuleMatcher") + || line.starts_with("impl<") && line.contains("Matcher") + || line.starts_with("fn match_node") + || line.starts_with("fn potential_kinds") + || line.contains("find(") + || line.contains("ast_grep(") + { return true; } false } - fn assess_separation_difficulty(dependencies: &[SerializationDependency]) -> SerializationSeverity { - let high_count = dependencies.iter().filter(|d| d.dependency_type.severity() == SerializationSeverity::High).count(); - let medium_count = dependencies.iter().filter(|d| d.dependency_type.severity() == SerializationSeverity::Medium).count(); + fn assess_separation_difficulty( + dependencies: &[SerializationDependency], + ) -> SerializationSeverity { + let high_count = dependencies + .iter() + .filter(|d| d.dependency_type.severity() == SerializationSeverity::High) + .count(); + let medium_count = dependencies + .iter() + .filter(|d| d.dependency_type.severity() == SerializationSeverity::Medium) + .count(); if high_count > 5 { SerializationSeverity::High @@ -321,8 +347,14 @@ impl SerializationAnalysisReport { // Update summaries for dep in &file_analysis.dependencies { - *self.dependency_summary.entry(dep.dependency_type.clone()).or_insert(0) += 1; - *self.category_summary.entry(dep.dependency_type.category().to_string()).or_insert(0) += 1; + *self + .dependency_summary + .entry(dep.dependency_type.clone()) + .or_insert(0) += 1; + *self + .category_summary + .entry(dep.dependency_type.category().to_string()) + .or_insert(0) += 1; } // Track high-impact files @@ -338,28 +370,40 @@ impl SerializationAnalysisReport { match file.separation_difficulty { SerializationSeverity::Low => { if file.serialization_density > 50.0 { - self.separation_strategy.serialization_only_files.push(file.file_path.clone()); + self.separation_strategy + .serialization_only_files + .push(file.file_path.clone()); } else { - self.separation_strategy.feature_gate_candidates.push(file.file_path.clone()); + self.separation_strategy + .feature_gate_candidates + .push(file.file_path.clone()); } } SerializationSeverity::Medium => { if file.core_functionality.len() > file.dependencies.len() { - self.separation_strategy.abstraction_layer_needed.push(file.file_path.clone()); + self.separation_strategy + .abstraction_layer_needed + .push(file.file_path.clone()); } else { - self.separation_strategy.mixed_responsibility_files.push(file.file_path.clone()); + self.separation_strategy + .mixed_responsibility_files + .push(file.file_path.clone()); } } SerializationSeverity::High => { if !file.core_functionality.is_empty() { - self.separation_strategy.mixed_responsibility_files.push(file.file_path.clone()); + self.separation_strategy + .mixed_responsibility_files + .push(file.file_path.clone()); } } } // Identify files with primarily core logic if file.serialization_density < 25.0 && !file.core_functionality.is_empty() { - self.separation_strategy.core_logic_files.push(file.file_path.clone()); + self.separation_strategy + .core_logic_files + .push(file.file_path.clone()); } } } @@ -370,10 +414,18 @@ impl SerializationAnalysisReport { report.push_str("# SERIALIZATION DEPENDENCY ANALYSIS REPORT\n\n"); report.push_str("## Executive Summary\n\n"); - report.push_str(&format!("- **Total files analyzed**: {}\n", self.files.len())); - report.push_str(&format!("- **High-impact files**: {}\n", self.high_impact_files.len())); - report.push_str(&format!("- **Total serialization dependencies**: {}\n", - self.dependency_summary.values().sum::())); + report.push_str(&format!( + "- **Total files analyzed**: {}\n", + self.files.len() + )); + report.push_str(&format!( + "- **High-impact files**: {}\n", + self.high_impact_files.len() + )); + report.push_str(&format!( + "- **Total serialization dependencies**: {}\n", + self.dependency_summary.values().sum::() + )); report.push_str("\n## Dependency Categories\n\n"); for (category, count) in &self.category_summary { @@ -382,17 +434,30 @@ impl SerializationAnalysisReport { report.push_str("\n## Detailed Dependency Breakdown\n\n"); for (dep_type, count) in &self.dependency_summary { - report.push_str(&format!("- **{:?}**: {} ({})\n", - dep_type, count, dep_type.category())); + report.push_str(&format!( + "- **{:?}**: {} ({})\n", + dep_type, + count, + dep_type.category() + )); } report.push_str("\n## High-Impact Files (Difficult to Separate)\n\n"); for file in &self.high_impact_files { if let Some(analysis) = self.files.iter().find(|f| f.file_path == *file) { report.push_str(&format!("### {}\n", file)); - report.push_str(&format!("- Serialization density: {:.1}%\n", analysis.serialization_density)); - report.push_str(&format!("- Dependencies: {}\n", analysis.dependencies.len())); - report.push_str(&format!("- Core functions: {}\n\n", analysis.core_functionality.len())); + report.push_str(&format!( + "- Serialization density: {:.1}%\n", + analysis.serialization_density + )); + report.push_str(&format!( + "- Dependencies: {}\n", + analysis.dependencies.len() + )); + report.push_str(&format!( + "- Core functions: {}\n\n", + analysis.core_functionality.len() + )); } } @@ -424,10 +489,14 @@ impl SerializationAnalysisReport { } report.push_str("\n## RECOMMENDATIONS\n\n"); - report.push_str("1. **Immediate actions**: Feature-gate files with low serialization impact\n"); + report.push_str( + "1. **Immediate actions**: Feature-gate files with low serialization impact\n", + ); report.push_str("2. **Short-term**: Create abstraction layer for files needing it\n"); report.push_str("3. **Medium-term**: Refactor mixed responsibility files\n"); - report.push_str("4. **Long-term**: Consider trait-based abstraction for core serialization needs\n"); + report.push_str( + "4. **Long-term**: Consider trait-based abstraction for core serialization needs\n", + ); report } @@ -440,15 +509,30 @@ mod tests { #[test] fn test_dependency_categorization() { assert_eq!(DependencyType::SerdeDerive.category(), "Core Serialization"); - assert_eq!(DependencyType::SerializationCall.category(), "Serialization Operations"); - assert_eq!(DependencyType::JsonSchemaUsage.category(), "Schema Generation"); + assert_eq!( + DependencyType::SerializationCall.category(), + "Serialization Operations" + ); + assert_eq!( + DependencyType::JsonSchemaUsage.category(), + "Schema Generation" + ); } #[test] fn test_severity_assessment() { - assert_eq!(DependencyType::SerdeDerive.severity(), SerializationSeverity::High); - assert_eq!(DependencyType::SerdeImport.severity(), SerializationSeverity::Low); - assert_eq!(DependencyType::SerializationCall.severity(), SerializationSeverity::Medium); + assert_eq!( + DependencyType::SerdeDerive.severity(), + SerializationSeverity::High + ); + assert_eq!( + DependencyType::SerdeImport.severity(), + SerializationSeverity::Low + ); + assert_eq!( + DependencyType::SerializationCall.severity(), + SerializationSeverity::Medium + ); } } diff --git a/crates/rule-engine/src/combined.rs b/crates/rule-engine/src/combined.rs index 099350c..e1a85d0 100644 --- a/crates/rule-engine/src/combined.rs +++ b/crates/rule-engine/src/combined.rs @@ -267,10 +267,10 @@ impl<'r, L: Language> CombinedScan<'r, L> { }; let (suppressions, mut suppression_nodes) = Suppressions::collect_all(root); let file_sup = suppressions.file_suppression(); - if let MaySuppressed::Yes(s) = file_sup { - if s.suppressed.is_none() { - return result.into_result(self, separate_fix); - } + if let MaySuppressed::Yes(s) = file_sup + && s.suppressed.is_none() + { + return result.into_result(self, separate_fix); } for node in root.root().dfs() { let kind = node.kind_id() as usize; diff --git a/crates/rule-engine/src/rule/referent_rule.rs b/crates/rule-engine/src/rule/referent_rule.rs index 78c419d..ab9794c 100644 --- a/crates/rule-engine/src/rule/referent_rule.rs +++ b/crates/rule-engine/src/rule/referent_rule.rs @@ -84,12 +84,14 @@ impl RuleRegistration { } pub(crate) fn insert_local(&self, id: &str, rule: Rule) -> Result<(), ReferentRuleError> { + if rule.check_cyclic(id) { + return Err(ReferentRuleError::CyclicRule(id.into())); + } let map = self.local.write(); if map.contains_key(id) { return Err(ReferentRuleError::DuplicateRule(id.into())); } map.insert(id.to_string(), rule); - let _rule = map.get(id).unwrap(); Ok(()) } diff --git a/crates/rule-engine/src/rule_collection.rs b/crates/rule-engine/src/rule_collection.rs index 7c0920c..b88856e 100644 --- a/crates/rule-engine/src/rule_collection.rs +++ b/crates/rule-engine/src/rule_collection.rs @@ -60,10 +60,10 @@ where impl ContingentRule { pub fn matches_path>(&self, path: P) -> bool { - if let Some(ignore_globs) = &self.ignore_globs { - if ignore_globs.is_match(&path) { - return false; - } + if let Some(ignore_globs) = &self.ignore_globs + && ignore_globs.is_match(&path) + { + return false; } if let Some(files_globs) = &self.files_globs { return files_globs.is_match(path); diff --git a/crates/rule-engine/src/rule_core.rs b/crates/rule-engine/src/rule_core.rs index ea20669..e09a832 100644 --- a/crates/rule-engine/src/rule_core.rs +++ b/crates/rule-engine/src/rule_core.rs @@ -232,10 +232,10 @@ impl RuleCore { env: &mut Cow>, enclosing_env: Option<&MetaVarEnv<'tree, D>>, ) -> Option> { - if let Some(kinds) = &self.kinds { - if !kinds.contains(node.kind_id().into()) { - return None; - } + if let Some(kinds) = &self.kinds + && !kinds.contains(node.kind_id().into()) + { + return None; } let ret = self.rule.match_node_with_env(node, env)?; if !env.to_mut().match_constraints(&self.constraints) { diff --git a/crates/rule-engine/src/transform/string_case.rs b/crates/rule-engine/src/transform/string_case.rs index 23285b2..3e688f4 100644 --- a/crates/rule-engine/src/transform/string_case.rs +++ b/crates/rule-engine/src/transform/string_case.rs @@ -137,15 +137,15 @@ impl Delimiter { } // case 2, consecutive UpperCases followed by lowercase // e.g. XMLHttp -> XML Http - if let MultiUpper(last_char) = state { - if c.is_lowercase() { - let new_left = *right - last_char.len_utf8(); - let range = *left..new_left; - *left = new_left; - *right += c.len_utf8(); - self.state = Lower; - return Some(range); - } + if let MultiUpper(last_char) = state + && c.is_lowercase() + { + let new_left = *right - last_char.len_utf8(); + let range = *left..new_left; + *left = new_left; + *right += c.len_utf8(); + self.state = Lower; + return Some(range); } *right += c.len_utf8(); if *state == CaseState::IgnoreCase { @@ -183,10 +183,10 @@ fn split<'a>(s: &'a str, seps: Option<&[Separator]>) -> impl Iterator( /// Extract function definitions using ast-grep patterns #[cfg(feature = "matching")] -fn extract_functions(root_node: &Node) -> ServiceResult> { - let mut functions = HashMap::new(); +fn extract_functions(root_node: &Node) -> ServiceResult> { + let mut functions = thread_utils::get_map(); // Try different function patterns based on common languages let patterns = [ @@ -122,8 +122,8 @@ fn extract_functions(root_node: &Node) -> ServiceResult( root_node: &Node, language: &SupportLang, -) -> ServiceResult> { - let mut imports = HashMap::new(); +) -> ServiceResult> { + let mut imports = thread_utils::get_map(); let patterns = match language { SupportLang::Rust => vec!["use $PATH;", "use $PATH::$ITEM;", "use $PATH::{$$$ITEMS};"], diff --git a/crates/services/src/error.rs b/crates/services/src/error.rs index dd5d014..e74d52c 100644 --- a/crates/services/src/error.rs +++ b/crates/services/src/error.rs @@ -282,7 +282,7 @@ pub struct ErrorContext { pub operation: Option, /// Additional context data - pub context_data: std::collections::HashMap, + pub context_data: thread_utils::RapidMap, } impl ErrorContext { diff --git a/crates/services/src/facade.rs b/crates/services/src/facade.rs index 52b9dc5..db0f691 100644 --- a/crates/services/src/facade.rs +++ b/crates/services/src/facade.rs @@ -22,6 +22,7 @@ use std::sync::Arc; pub struct ThreadService, D: crate::types::Doc + Send + Sync> { #[allow(dead_code)] analyzer: Arc, + #[allow(dead_code)] #[cfg(feature = "storage-traits")] storage: Option>, _marker: std::marker::PhantomData, diff --git a/crates/services/src/lib.rs b/crates/services/src/lib.rs index 71bf669..e0afb29 100644 --- a/crates/services/src/lib.rs +++ b/crates/services/src/lib.rs @@ -168,10 +168,10 @@ impl ExecutionContext for FileSystemContext { let mut sources = Vec::new(); for entry in std::fs::read_dir(&self.base_path)? { let entry = entry?; - if entry.file_type()?.is_file() { - if let Some(name) = entry.file_name().to_str() { - sources.push(name.to_string()); - } + if entry.file_type()?.is_file() + && let Some(name) = entry.file_name().to_str() + { + sources.push(name.to_string()); } } Ok(sources) diff --git a/crates/services/src/traits/analyzer.rs b/crates/services/src/traits/analyzer.rs index e0808b3..be02249 100644 --- a/crates/services/src/traits/analyzer.rs +++ b/crates/services/src/traits/analyzer.rs @@ -9,7 +9,7 @@ use crate::types::Doc; use async_trait::async_trait; -use std::collections::HashMap; +use thread_utils::RapidMap; use crate::error::{AnalysisError, ServiceResult}; use crate::types::{AnalysisContext, CodeMatch, CrossFileRelationship, ParsedDocument}; @@ -246,14 +246,15 @@ pub trait CodeAnalyzer: Send + Sync { if ch == '$' { _found_metavar = true; // Next character should be alphabetic or underscore - if let Some(next_ch) = chars.next() { - if !next_ch.is_alphabetic() && next_ch != '_' { - return Err(AnalysisError::MetaVariable { - variable: format!("${}", next_ch), - message: "Invalid meta-variable format".to_string(), - } - .into()); + if let Some(next_ch) = chars.next() + && !next_ch.is_alphabetic() + && next_ch != '_' + { + return Err(AnalysisError::MetaVariable { + variable: format!("${}", next_ch), + message: "Invalid meta-variable format".to_string(), } + .into()); } } } @@ -337,7 +338,7 @@ pub struct AnalyzerCapabilities { pub performance_profile: AnalysisPerformanceProfile, /// Additional capability flags - pub capability_flags: HashMap, + pub capability_flags: RapidMap, } impl Default for AnalyzerCapabilities { @@ -351,7 +352,7 @@ impl Default for AnalyzerCapabilities { supports_incremental_analysis: false, supported_analysis_depths: vec![AnalysisDepth::Syntax, AnalysisDepth::Local], performance_profile: AnalysisPerformanceProfile::Balanced, - capability_flags: HashMap::new(), + capability_flags: thread_utils::get_map(), } } } @@ -409,7 +410,7 @@ pub struct AnalysisConfig { pub performance_profile: Option, /// Custom configuration options - pub custom_options: HashMap, + pub custom_options: RapidMap, } impl Default for AnalysisConfig { @@ -419,7 +420,7 @@ impl Default for AnalysisConfig { collect_relationships: false, enable_pattern_caching: true, performance_profile: None, // Auto-detect - custom_options: HashMap::new(), + custom_options: thread_utils::get_map(), } } } diff --git a/crates/services/src/traits/parser.rs b/crates/services/src/traits/parser.rs index 956ec42..0d2555b 100644 --- a/crates/services/src/traits/parser.rs +++ b/crates/services/src/traits/parser.rs @@ -8,8 +8,8 @@ //! functionality while preserving all its capabilities. use async_trait::async_trait; -use std::collections::HashMap; use std::path::Path; +use thread_utils::RapidMap; use crate::error::{ParseError, ServiceResult}; use crate::types::{AnalysisContext, ParsedDocument}; @@ -202,14 +202,14 @@ pub trait CodeParser: Send + Sync { // Check content size limits based on capabilities let capabilities = self.capabilities(); - if let Some(max_size) = capabilities.max_content_size { - if content.len() > max_size { - return Err(ParseError::ContentTooLarge { - size: content.len(), - max_size, - } - .into()); + if let Some(max_size) = capabilities.max_content_size + && content.len() > max_size + { + return Err(ParseError::ContentTooLarge { + size: content.len(), + max_size, } + .into()); } Ok(()) @@ -280,7 +280,7 @@ pub struct ParserCapabilities { pub performance_profile: PerformanceProfile, /// Additional capability flags - pub capability_flags: HashMap, + pub capability_flags: RapidMap, } impl Default for ParserCapabilities { @@ -294,7 +294,7 @@ impl Default for ParserCapabilities { supports_metadata_collection: true, supports_cross_file_analysis: false, performance_profile: PerformanceProfile::Balanced, - capability_flags: HashMap::new(), + capability_flags: thread_utils::get_map(), } } } @@ -338,7 +338,7 @@ pub struct ParserConfig { pub execution_strategy: Option, /// Custom configuration options - pub custom_options: HashMap, + pub custom_options: RapidMap, } impl Default for ParserConfig { @@ -347,7 +347,7 @@ impl Default for ParserConfig { collect_metadata: true, enable_error_recovery: true, execution_strategy: None, // Auto-detect - custom_options: HashMap::new(), + custom_options: thread_utils::get_map(), } } } diff --git a/crates/services/src/traits/storage.rs b/crates/services/src/traits/storage.rs index dc70693..605d735 100644 --- a/crates/services/src/traits/storage.rs +++ b/crates/services/src/traits/storage.rs @@ -9,10 +9,10 @@ //! implementations are commercial-only features. use async_trait::async_trait; -use std::collections::HashMap; use std::time::{Duration, SystemTime}; +use thread_utils::RapidMap; -use crate::error::{ServiceResult, StorageError}; +use crate::error::ServiceResult; use crate::types::{AnalysisContext, CrossFileRelationship, ParsedDocument}; use thread_ast_engine::source::Doc; @@ -52,7 +52,7 @@ use thread_ast_engine::source::Doc; /// let storage: Box = Box::new( /// PostgresStorageService::new("connection_string").await.unwrap() /// ); -/// +/// /// // Store analysis results persistently /// // storage.store_analysis_result(...).await.unwrap(); /// } @@ -69,7 +69,9 @@ pub trait StorageService: Send + Sync { key: &AnalysisKey, result: &AnalysisResult, context: &AnalysisContext, - ) -> ServiceResult<()>; + ) -> ServiceResult<()> + where + Self: Sized; /// Load cached analysis results. /// @@ -79,7 +81,9 @@ pub trait StorageService: Send + Sync { &self, key: &AnalysisKey, context: &AnalysisContext, - ) -> ServiceResult>>; + ) -> ServiceResult>> + where + Self: Sized; /// Store parsed document for caching. /// @@ -89,7 +93,9 @@ pub trait StorageService: Send + Sync { &self, document: &ParsedDocument, context: &AnalysisContext, - ) -> ServiceResult; + ) -> ServiceResult + where + Self: Sized; /// Load cached parsed document. /// @@ -99,7 +105,9 @@ pub trait StorageService: Send + Sync { &self, key: &StorageKey, context: &AnalysisContext, - ) -> ServiceResult>>; + ) -> ServiceResult>> + where + Self: Sized; /// Store cross-file relationships. /// @@ -209,7 +217,7 @@ pub struct AnalysisKey { pub struct AnalysisResult { pub documents: Vec>, pub relationships: Vec, - pub metadata: HashMap, + pub metadata: RapidMap, pub timestamp: SystemTime, pub version: String, } @@ -284,7 +292,7 @@ pub struct MaintenanceResult { pub operation: MaintenanceOperation, pub success: bool, pub message: String, - pub metrics: HashMap, + pub metrics: RapidMap, pub duration: Duration, } @@ -408,9 +416,9 @@ pub struct AnalyticsSummary { #[derive(Debug, Clone)] pub struct PerformanceMetrics { pub period: TimePeriod, - pub throughput: f64, // operations per second - pub latency_percentiles: HashMap, // p50, p95, p99 - pub error_rates: HashMap, + pub throughput: f64, // operations per second + pub latency_percentiles: RapidMap, // p50, p95, p99 + pub error_rates: RapidMap, pub resource_usage: ResourceUsage, } @@ -460,13 +468,16 @@ mod tests { fn test_analysis_key() { let key = AnalysisKey { operation_type: "pattern_match".to_string(), - content_hash: 12345, + content_fingerprint: recoco_utils::fingerprint::Fingerprint([0u8; 16]), configuration_hash: 67890, version: "1.0".to_string(), }; assert_eq!(key.operation_type, "pattern_match"); - assert_eq!(key.content_hash, 12345); + assert_eq!( + key.content_fingerprint, + recoco_utils::fingerprint::Fingerprint([0u8; 16]) + ); } #[test] diff --git a/crates/services/src/types.rs b/crates/services/src/types.rs index 037f01c..b5752ff 100644 --- a/crates/services/src/types.rs +++ b/crates/services/src/types.rs @@ -23,8 +23,8 @@ //! - [`AnalysisContext`] - Carries execution and analysis context across service boundaries use std::any::Any; -use std::collections::HashMap; use std::path::PathBuf; +use thread_utils::RapidMap; // Conditionally import thread dependencies when available #[cfg(feature = "ast-grep-backend")] @@ -322,13 +322,13 @@ impl<'tree, D: Doc> CodeMatch<'tree, D> { #[derive(Debug, Default, Clone)] pub struct DocumentMetadata { /// Symbols defined in this document (functions, classes, variables) - pub defined_symbols: HashMap, + pub defined_symbols: RapidMap, /// Symbols imported from other files - pub imported_symbols: HashMap, + pub imported_symbols: RapidMap, /// Symbols exported by this file - pub exported_symbols: HashMap, + pub exported_symbols: RapidMap, /// Function calls made in this document pub function_calls: Vec, @@ -337,7 +337,7 @@ pub struct DocumentMetadata { pub type_info: Vec, /// Language-specific metadata - pub language_metadata: HashMap, + pub language_metadata: RapidMap, } /// Information about a symbol definition @@ -394,7 +394,7 @@ pub struct CrossFileRelationship { pub target_file: PathBuf, pub source_symbol: String, pub target_symbol: String, - pub relationship_data: HashMap, + pub relationship_data: RapidMap, } /// Context for pattern matches @@ -402,7 +402,7 @@ pub struct CrossFileRelationship { pub struct MatchContext { pub execution_scope: ExecutionScope, pub analysis_depth: AnalysisDepth, - pub context_data: HashMap, + pub context_data: RapidMap, } /// Execution scope for analysis operations @@ -458,7 +458,7 @@ pub struct AnalysisContext { pub execution_config: ExecutionConfig, /// Custom context data - pub context_data: HashMap, + pub context_data: RapidMap, } impl Default for AnalysisContext { @@ -471,7 +471,7 @@ impl Default for AnalysisContext { exclude_patterns: vec!["**/node_modules/**".to_string(), "**/target/**".to_string()], max_files: None, execution_config: ExecutionConfig::default(), - context_data: HashMap::new(), + context_data: thread_utils::get_map(), } } } diff --git a/crates/thread/Cargo.toml b/crates/thread/Cargo.toml new file mode 100644 index 0000000..937fb2c --- /dev/null +++ b/crates/thread/Cargo.toml @@ -0,0 +1,81 @@ +# SPDX-FileCopyrightText: 2026 Knitli Inc. +# SPDX-FileContributor: Adam Poulemanos +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +[package] +name = "thread" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +description = "A safe, fast, flexible code analysis and parsing library built in Rust. High-level entry point for the Thread ecosystem." +documentation.workspace = true +readme.workspace = true +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +thread-ast-engine = { workspace = true, optional = true } +thread-flow = { workspace = true, optional = true } +thread-language = { workspace = true, optional = true } +thread-rule-engine = { workspace = true, optional = true } +thread-services = { workspace = true, optional = true } +thread-utils = { workspace = true, optional = true } + +[features] +default = ["ast", "language", "rule", "services"] +# Core AST engine +ast = [ + "dep:thread-ast-engine", + "thread-ast-engine/matching", + "thread-ast-engine/parsing" +] +# Dataflow orchestration and caching (requires heavy dependencies) +flow = [ + "dep:thread-flow", + "dep:thread-services", + "thread-flow/parallel", + "thread-flow/postgres-backend", + "thread-services/serialization" +] +# Full feature set +full = [ + "ast", + "flow", + "language", + "rule", + "services", + "thread-language/html-embedded", + "thread-services/tower-services", + "utils" +] +# Language support and tree-sitter parsers +language = ["dep:thread-language", "thread-language/all-parsers"] +# Rule engine for scanning and transformation +rule = ["dep:thread-rule-engine"] +# High-level service layer and abstractions +services = [ + "dep:thread-services", + "thread-services/ast-grep-backend", + "thread-services/performance", + "thread-services/serialization" +] +# Shared utilities +utils = ["dep:thread-utils"] +# Special feature for WASM/Edge deployment +worker = [ + "ast", + "language", + "rule", + "services", + "thread-flow/worker", + "thread-rule-engine/worker", + "thread-services/ast-grep-backend" +] diff --git a/crates/thread/src/lib.rs b/crates/thread/src/lib.rs new file mode 100644 index 0000000..9d1dae7 --- /dev/null +++ b/crates/thread/src/lib.rs @@ -0,0 +1,91 @@ +// SPDX-FileCopyrightText: 2026 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: AGPL-3.0-or-later + +//! # Thread - Safe, Fast, Flexible Code Analysis and Parsing +//! +//! **Thread** is a high-performance ecosystem for code analysis and transformation built in Rust. +//! It combines the power of tree-sitter for robust parsing with a high-level rule engine +//! and content-addressed caching for efficient codebase-wide analysis. +//! +//! This crate serves as the primary entry point for the Thread ecosystem, re-exporting +//! core components from specialized sub-crates. +//! +//! ## Core Architecture +//! +//! Thread is built on a modular "service-library dual architecture": +//! +//! 1. **Library Ecosystem** - Reusable components for AST pattern matching and transformation. +//! 2. **Service Platform** - Persistent analysis with incremental intelligence and caching. +//! +//! ## Key Modules +//! +//! - [`ast`] - Core AST parsing, matching, and transformation engine. +//! - [`language`] - Support for various programming languages via tree-sitter. +//! - [`rule`] - Rule-based scanning and transformation system. +//! - [`services`] - High-level service interfaces and abstractions. +//! - [`flow`] - Dataflow orchestration and incremental analysis (optional). +//! - [`utils`] - Common utilities and performance optimizations. +//! +//! ## Quick Start +//! +//! ```rust,no_run +//! use thread::language::{Tsx, LanguageExt}; +//! +//! // Parse code and find patterns +//! let ast = Tsx.ast_grep("function hello() { console.log('world'); }"); +//! let root = ast.root(); +//! let matches = root.find_all("console.log($$$ARGS)"); +//! +//! for m in matches { +//! println!("Found console.log with {} arguments", m.get_env().get_multiple_matches("ARGS").len()); +//! } +//! ``` + +/// Core AST engine for parsing, matching, and transformation. +#[cfg(feature = "ast")] +pub mod ast { + pub use thread_ast_engine::*; +} + +/// Language definitions and tree-sitter parser integrations. +#[cfg(feature = "language")] +pub mod language { + #[cfg(feature = "ast")] + pub use thread_ast_engine::tree_sitter::LanguageExt; + pub use thread_language::*; +} + +/// Rule-based scanning and transformation system. +#[cfg(feature = "rule")] +pub mod rule { + pub use thread_rule_engine::*; +} + +/// Dataflow orchestration layer for incremental computation and caching. +#[cfg(any(feature = "flow", feature = "worker"))] +pub mod flow { + pub use thread_flow::*; +} + +/// High-level service interfaces and application abstractions. +#[cfg(feature = "services")] +pub mod services { + pub use thread_services::*; +} + +/// Shared utilities and performance-critical primitives. +#[cfg(feature = "utils")] +pub mod utils { + pub use thread_utils::*; +} + +// Re-export common types at the top level for better ergonomics +#[cfg(feature = "ast")] +pub use thread_ast_engine::{AstGrep, Language, Node, Root}; + +#[cfg(feature = "language")] +pub use thread_language::SupportLang; + +#[cfg(feature = "services")] +pub use thread_services::{CodeAnalyzer, CodeParser, ParsedDocument, ServiceError, ServiceResult}; diff --git a/crates/thread/tests/integration.rs b/crates/thread/tests/integration.rs new file mode 100644 index 0000000..6648b57 --- /dev/null +++ b/crates/thread/tests/integration.rs @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: 2026 Knitli Inc. +// SPDX-FileContributor: Adam Poulemanos +// SPDX-License-Identifier: AGPL-3.0-or-later + +#[cfg(all(feature = "ast", feature = "language"))] +#[test] +fn test_reexports_work() { + use thread::language::{LanguageExt, Tsx}; + + let ast = Tsx.ast_grep("const x = 1;"); + let matches: Vec<_> = ast.root().find_all("const $VAR = $VALUE").collect(); + assert_eq!(matches.len(), 1); +} + +#[cfg(all(feature = "ast", feature = "services"))] +#[test] +fn test_service_reexports_work() { + use thread::services::FileSystemContext; + + // Just check if we can use the types + let _ctx = FileSystemContext::new("."); +} diff --git a/crates/utils/src/hash_help.rs b/crates/utils/src/hash_help.rs index 3c653a6..c6fd952 100644 --- a/crates/utils/src/hash_help.rs +++ b/crates/utils/src/hash_help.rs @@ -67,19 +67,20 @@ pub fn hash_file(file: &mut std::fs::File) -> Result { /// Computes a hash for a [`std::fs::File`] object using `rapidhash` with a specified seed. pub fn hash_file_with_seed(file: &mut std::fs::File, seed: u64) -> Result { let secrets = rapidhash::v3::RapidSecrets::seed(seed); - rapidhash::v3::rapidhash_v3_file_seeded(file, &secrets) - .map_err(std::io::Error::other) + rapidhash::v3::rapidhash_v3_file_seeded(file, &secrets).map_err(std::io::Error::other) } /// Computes a hash for a byte slice using `rapidhash`. #[inline(always)] -#[must_use] pub const fn hash_bytes(bytes: &[u8]) -> u64 { +#[must_use] +pub const fn hash_bytes(bytes: &[u8]) -> u64 { rapidhash::v3::rapidhash_v3(bytes) } /// Computes a hash for a byte slice using `rapidhash` with a specified seed. #[inline(always)] -#[must_use] pub const fn hash_bytes_with_seed(bytes: &[u8], seed: u64) -> u64 { +#[must_use] +pub const fn hash_bytes_with_seed(bytes: &[u8], seed: u64) -> u64 { // Note: RapidSecrets::seed is const, so this should be fine in a const fn let secrets = rapidhash::v3::RapidSecrets::seed(seed); rapidhash::v3::rapidhash_v3_seeded(bytes, &secrets) @@ -117,7 +118,7 @@ mod tests { let hash1 = hash_bytes(b"hello"); let hash2 = hash_bytes(b"world"); let hash3 = hash_bytes(b"hello world"); - + // Different inputs should produce different hashes assert_ne!(hash1, hash2); assert_ne!(hash1, hash3); @@ -129,7 +130,7 @@ mod tests { let data = b"The quick brown fox jumps over the lazy dog"; let hash1 = hash_bytes(data); let hash2 = hash_bytes(data); - + assert_eq!(hash1, hash2, "Hash should be deterministic"); } @@ -139,7 +140,7 @@ mod tests { let hash1 = hash_bytes(b"test"); let hash2 = hash_bytes(b"Test"); // Single bit change let hash3 = hash_bytes(b"test1"); // Additional character - + assert_ne!(hash1, hash2); assert_ne!(hash1, hash3); assert_ne!(hash2, hash3); @@ -150,22 +151,24 @@ mod tests { // Test with larger input let large_data = vec![0u8; 10000]; let hash1 = hash_bytes(&large_data); - + // Should be deterministic even for large inputs assert_eq!(hash1, hash_bytes(&large_data)); - + // Slightly different large input let mut large_data2 = large_data.clone(); large_data2[5000] = 1; let hash2 = hash_bytes(&large_data2); - + assert_ne!(hash1, hash2); } #[test] fn test_hash_bytes_various_sizes() { // Test various input sizes to exercise different code paths - for size in [0, 1, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128, 255, 256, 1023, 1024] { + for size in [ + 0, 1, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128, 255, 256, 1023, 1024, + ] { let data = vec![0u8; size]; let hash = hash_bytes(&data); // Should be deterministic @@ -178,21 +181,21 @@ mod tests { fn test_hash_bytes_with_seed_deterministic() { let data = b"test data"; let seed = 12345u64; - + let hash1 = hash_bytes_with_seed(data, seed); let hash2 = hash_bytes_with_seed(data, seed); - + assert_eq!(hash1, hash2, "Hash with seed should be deterministic"); } #[test] fn test_hash_bytes_with_seed_different_seeds() { let data = b"test data"; - + let hash1 = hash_bytes_with_seed(data, 1); let hash2 = hash_bytes_with_seed(data, 2); let hash3 = hash_bytes_with_seed(data, 3); - + // Different seeds should produce different hashes assert_ne!(hash1, hash2); assert_ne!(hash1, hash3); @@ -204,7 +207,7 @@ mod tests { let seed = 42u64; let hash1 = hash_bytes_with_seed(&[], seed); let hash2 = hash_bytes_with_seed(&[], seed); - + assert_eq!(hash1, hash2); } @@ -213,12 +216,12 @@ mod tests { // Test that different seeds produce well-distributed hashes let data = b"test"; let mut hashes = HashSet::new(); - + for seed in 0..100 { let hash = hash_bytes_with_seed(data, seed); hashes.insert(hash); } - + // Should have high uniqueness (allowing for small collision chance) assert!( hashes.len() >= HASH_DISTRIBUTION_MIN_UNIQUENESS, @@ -232,14 +235,14 @@ mod tests { fn test_hash_file_empty() -> Result<(), std::io::Error> { let mut temp_file = tempfile::NamedTempFile::new()?; temp_file.flush()?; - + let mut file = temp_file.reopen()?; let hash1 = hash_file(&mut file)?; - + // Reopen and hash again let mut file = temp_file.reopen()?; let hash2 = hash_file(&mut file)?; - + assert_eq!(hash1, hash2, "Empty file hash should be deterministic"); Ok(()) } @@ -249,14 +252,14 @@ mod tests { let mut temp_file = tempfile::NamedTempFile::new()?; temp_file.write_all(b"hello world")?; temp_file.flush()?; - + let mut file = temp_file.reopen()?; let hash1 = hash_file(&mut file)?; - + // Reopen and hash again let mut file = temp_file.reopen()?; let hash2 = hash_file(&mut file)?; - + assert_eq!(hash1, hash2, "File hash should be deterministic"); Ok(()) } @@ -266,18 +269,21 @@ mod tests { let mut temp_file1 = tempfile::NamedTempFile::new()?; temp_file1.write_all(b"hello")?; temp_file1.flush()?; - + let mut temp_file2 = tempfile::NamedTempFile::new()?; temp_file2.write_all(b"world")?; temp_file2.flush()?; - + let mut file1 = temp_file1.reopen()?; let hash1 = hash_file(&mut file1)?; - + let mut file2 = temp_file2.reopen()?; let hash2 = hash_file(&mut file2)?; - - assert_ne!(hash1, hash2, "Different file contents should produce different hashes"); + + assert_ne!( + hash1, hash2, + "Different file contents should produce different hashes" + ); Ok(()) } @@ -287,14 +293,14 @@ mod tests { let large_data = vec![0xABu8; LARGE_FILE_SIZE]; temp_file.write_all(&large_data)?; temp_file.flush()?; - + let mut file = temp_file.reopen()?; let hash1 = hash_file(&mut file)?; - + // Reopen and hash again let mut file = temp_file.reopen()?; let hash2 = hash_file(&mut file)?; - + assert_eq!(hash1, hash2, "Large file hash should be deterministic"); Ok(()) } @@ -302,17 +308,20 @@ mod tests { #[test] fn test_hash_file_vs_hash_bytes_consistency() -> Result<(), std::io::Error> { let data = b"test data for consistency check"; - + let mut temp_file = tempfile::NamedTempFile::new()?; temp_file.write_all(data)?; temp_file.flush()?; - + let mut file = temp_file.reopen()?; let file_hash = hash_file(&mut file)?; - + let bytes_hash = hash_bytes(data); - - assert_eq!(file_hash, bytes_hash, "File hash should match byte hash for same content"); + + assert_eq!( + file_hash, bytes_hash, + "File hash should match byte hash for same content" + ); Ok(()) } @@ -322,15 +331,15 @@ mod tests { let mut temp_file = tempfile::NamedTempFile::new()?; temp_file.write_all(b"test data")?; temp_file.flush()?; - + let seed = 12345u64; - + let mut file1 = temp_file.reopen()?; let hash1 = hash_file_with_seed(&mut file1, seed)?; - + let mut file2 = temp_file.reopen()?; let hash2 = hash_file_with_seed(&mut file2, seed)?; - + assert_eq!(hash1, hash2, "File hash with seed should be deterministic"); Ok(()) } @@ -340,16 +349,16 @@ mod tests { let mut temp_file = tempfile::NamedTempFile::new()?; temp_file.write_all(b"test data")?; temp_file.flush()?; - + let mut file1 = temp_file.reopen()?; let hash1 = hash_file_with_seed(&mut file1, 1)?; - + let mut file2 = temp_file.reopen()?; let hash2 = hash_file_with_seed(&mut file2, 2)?; - + let mut file3 = temp_file.reopen()?; let hash3 = hash_file_with_seed(&mut file3, 3)?; - + assert_ne!(hash1, hash2); assert_ne!(hash1, hash3); assert_ne!(hash2, hash3); @@ -360,17 +369,20 @@ mod tests { fn test_hash_file_with_seed_vs_hash_bytes_consistency() -> Result<(), std::io::Error> { let data = b"test data for seeded consistency"; let seed = 42u64; - + let mut temp_file = tempfile::NamedTempFile::new()?; temp_file.write_all(data)?; temp_file.flush()?; - + let mut file = temp_file.reopen()?; let file_hash = hash_file_with_seed(&mut file, seed)?; - + let bytes_hash = hash_bytes_with_seed(data, seed); - - assert_eq!(file_hash, bytes_hash, "Seeded file hash should match seeded byte hash"); + + assert_eq!( + file_hash, bytes_hash, + "Seeded file hash should match seeded byte hash" + ); Ok(()) } @@ -404,11 +416,11 @@ mod tests { #[test] fn test_rapid_map_basic_operations() { let mut map: RapidMap = get_map(); - + map.insert("one".to_string(), 1); map.insert("two".to_string(), 2); map.insert("three".to_string(), 3); - + assert_eq!(map.len(), 3); assert_eq!(map.get("one"), Some(&1)); assert_eq!(map.get("two"), Some(&2)); @@ -419,11 +431,11 @@ mod tests { #[test] fn test_rapid_set_basic_operations() { let mut set: RapidSet = get_set(); - + set.insert("apple".to_string()); set.insert("banana".to_string()); set.insert("cherry".to_string()); - + assert_eq!(set.len(), 3); assert!(set.contains("apple")); assert!(set.contains("banana")); @@ -434,11 +446,11 @@ mod tests { #[test] fn test_rapid_map_with_capacity_usage() { let mut map: RapidMap = map_with_capacity(10); - + for i in 0..5 { map.insert(i, format!("value_{}", i)); } - + assert_eq!(map.len(), 5); assert!(map.capacity() >= 10); } @@ -446,11 +458,11 @@ mod tests { #[test] fn test_rapid_set_with_capacity_usage() { let mut set: RapidSet = set_with_capacity(10); - + for i in 0..5 { set.insert(i); } - + assert_eq!(set.len(), 5); assert!(set.capacity() >= 10); } @@ -459,13 +471,13 @@ mod tests { fn test_rapid_map_hash_distribution() { // Test that RapidMap handles hash collisions properly let mut map: RapidMap = get_map(); - + for i in 0..HASH_DISTRIBUTION_TEST_SIZE { map.insert(i as i32, format!("value_{}", i)); } - + assert_eq!(map.len(), HASH_DISTRIBUTION_TEST_SIZE); - + // Verify all values are retrievable for i in 0..HASH_DISTRIBUTION_TEST_SIZE { assert_eq!(map.get(&(i as i32)), Some(&format!("value_{}", i))); @@ -476,13 +488,13 @@ mod tests { fn test_rapid_set_hash_distribution() { // Test that RapidSet handles hash collisions properly let mut set: RapidSet = get_set(); - + for i in 0..HASH_DISTRIBUTION_TEST_SIZE { set.insert(i as i32); } - + assert_eq!(set.len(), HASH_DISTRIBUTION_TEST_SIZE); - + // Verify all values are present for i in 0..HASH_DISTRIBUTION_TEST_SIZE { assert!(set.contains(&(i as i32))); diff --git a/crates/wasm/Cargo.toml b/crates/wasm/Cargo.toml index 1dea97d..7caf6c7 100644 --- a/crates/wasm/Cargo.toml +++ b/crates/wasm/Cargo.toml @@ -60,7 +60,7 @@ thread-language = { workspace = true } thread-utils = { workspace = true, default-features = false, features = [ "hashers", ] } -wasm-bindgen = { version = "0.2.100" } +wasm-bindgen = { version = "0.2.100", optional = true } web-sys = { version = "0.3.77", features = ["console"], optional = true } [dev-dependencies] @@ -70,7 +70,7 @@ wasm-bindgen-test = "0.3.50" default = ["worker"] browser = ["multi-threading", "serialization"] multi-threading = ["rayon"] -panic_hook = ["console_error_panic_hook", "web-sys/console"] +panic-hook = ["console_error_panic_hook", "web-sys/console"] # TODO: wasi = [ ] serialization = ["serde/derive"] worker = ["web-sys/console"] # single-threaded worker deployment diff --git a/crates/wasm/src/utils.rs b/crates/wasm/src/utils.rs index 0d15ef3..f5890be 100644 --- a/crates/wasm/src/utils.rs +++ b/crates/wasm/src/utils.rs @@ -3,8 +3,9 @@ // // SPDX-License-Identifier: AGPL-3.0-or-later +#[allow(unused_imports)] #[cfg(feature = "console_error_panic_hook")] -use wasm_bindgen::*; +use wasm_bindgen::prelude::wasm_bindgen; #[cfg(feature = "console_error_panic_hook")] #[wasm_bindgen(start)] pub fn set_panic_hook() { diff --git a/deny.toml b/deny.toml index 143e4a1..fbf9801 100644 --- a/deny.toml +++ b/deny.toml @@ -124,10 +124,12 @@ confidence-threshold = 0.8 # aren't accepted for every possible crate as with the normal allow list exceptions = [ { allow = ["AGPL-3.0-or-later"], crate = "xtask" }, - { allow = ["AGPL-3.0-or-later", "AGPL-3.0-or-later AND MIT"], crate = "thread-ast-engine" }, - { allow = ["AGPL-3.0-or-later", "AGPL-3.0-or-later AND MIT"], crate = "thread-rule-engine" }, + { allow = ["AGPL-3.0-or-later"], crate = "thread-ast-engine" }, + { allow = ["AGPL-3.0-or-later"], crate = "thread-rule-engine" }, { allow = ["AGPL-3.0-or-later"], crate = "thread-utils" }, - { allow = ["AGPL-3.0-or-later", "AGPL-3.0-or-later AND MIT"], crate = "thread-language" }, + { allow = ["AGPL-3.0-or-later"], crate = "thread-language" }, + { allow = ["AGPL-3.0-or-later"], crate = "thread-flow" }, + { allow = ["AGPL-3.0-or-later"], crate = "thread" }, { allow = ["AGPL-3.0-or-later"], crate = "thread-services" }, { allow = ["AGPL-3.0-or-later"], crate = "thread-wasm" }, @@ -226,8 +228,26 @@ deny = [ # exact = true # Certain crates/versions that will be skipped when doing duplicate detection. skip = [ - # "ansiterm@0.11.0", - # { crate = "ansiterm@0.11.0", reason = "you can specify a reason why it can't be updated/removed" }, + # Transitive version conflicts from external dependencies we don't control. + # All older versions below are pinned by indirect deps; the newest version is the one we want. + { crate = "core-foundation@0.9.4", reason = "Pinned by system-configuration via hyper-util" }, + { crate = "foldhash@0.1.5", reason = "Older version pulled by transitive dep" }, + { crate = "getrandom@0.2.17", reason = "Pinned by rand 0.8 ecosystem (testcontainers)" }, + { crate = "getrandom@0.3.4", reason = "Mid-version pulled by transitive dep" }, + { crate = "hashbrown@0.15.5", reason = "Older version pulled by transitive dep" }, + { crate = "hashlink@0.10.0", reason = "Older version pulled by sqlx" }, + { crate = "phf@0.12.1", reason = "Older version pulled by tree-sitter-php" }, + { crate = "phf_shared@0.12.1", reason = "Older version pulled by tree-sitter-php" }, + { crate = "rand@0.8.5", reason = "Pinned by testcontainers and other external deps" }, + { crate = "rand_chacha@0.3.1", reason = "Part of rand 0.8 ecosystem" }, + { crate = "rand_core@0.6.4", reason = "Part of rand 0.8 ecosystem" }, + { crate = "webpki-roots@0.26.11", reason = "Older version pulled by hyper-rustls" }, + { crate = "whoami@1.6.1", reason = "Older version pulled by transitive dep" }, + { crate = "windows-sys@0.48.0", reason = "Legacy version pulled by older transitive deps" }, + { crate = "windows-sys@0.60.2", reason = "Mid-version pulled by transitive dep" }, + { crate = "windows-targets@0.48.5", reason = "Part of windows-sys 0.48 ecosystem" }, + { crate = "windows_x86_64_gnu@0.48.5", reason = "Part of windows-sys 0.48 ecosystem" }, + { crate = "windows_x86_64_msvc@0.48.5", reason = "Part of windows-sys 0.48 ecosystem" }, ] # Similarly to `skip` allows you to skip certain crates during duplicate # detection. Unlike skip, it also includes the entire tree of transitive @@ -256,7 +276,8 @@ allow-git = [] [sources.allow-org] # github.com organizations to allow git sources for -github = ["knitli"] +# Re-add "knitli" here when Thread publishes git-sourced dependencies from that org +github = [] # gitlab.com organizations to allow git sources for gitlab = [] # bitbucket.org organizations to allow git sources for diff --git a/docs/deployment/EDGE_DEPLOYMENT.md b/docs/deployment/EDGE_DEPLOYMENT.md index 1742d02..55a7f14 100644 --- a/docs/deployment/EDGE_DEPLOYMENT.md +++ b/docs/deployment/EDGE_DEPLOYMENT.md @@ -1,3 +1,9 @@ + + # Thread Flow Edge Deployment Guide Comprehensive guide for deploying Thread Flow to Cloudflare Workers with D1 distributed database backend. diff --git a/hk.pkl b/hk.pkl index 112f938..d7a6c04 100644 --- a/hk.pkl +++ b/hk.pkl @@ -11,14 +11,22 @@ local linters = new Mapping { check = "cargo deny --all-features --manifest-path {{ workspace_indicator }} -f json -L warn check --audit-compatible-output --exclude-dev --hide-inclusion-graph | jq -e '.[].vulnerabilities | length == 0' || exit 1" } - ["cargo_fmt"] = Builtins.cargo_fmt + ["cargo_fmt"] = (Builtins.cargo_fmt) { + check = + "cargo fmt --all --manifest-path {{workspace_indicator}} -- --check --config-path ./rustfmt.toml" + check_list_files = + "cargo fmt --all --manifest-path {{workspace_indicator}} -- --check --files-with-diff --config-path ./rustfmt.toml" + fix = "cargo fmt --all --manifest-path {{workspace_indicator}} -- --config-path ./rustfmt.toml" + } ["cargo_clippy"] = Builtins.cargo_clippy ["cargo_check"] = Builtins.cargo_check ["cargo_test"] = new Step { workspace_indicator = "Cargo.toml" glob = "src/**/*.rs" + // thread-language's napi-* features conflict with tree-sitter-parsing at runtime; + // run it separately with only the compatible feature set. check = - "cargo nextest --manifest-path {{ workspace_indicator }} run --all-features --no-fail-fast -j 1" + "cargo nextest --manifest-path {{ workspace_indicator }} run --workspace --exclude thread-language --all-features --no-fail-fast -j 1 && cargo nextest --manifest-path {{ workspace_indicator }} run -p thread-language --features all-parsers,matching --no-fail-fast -j 1" env = new Mapping { ["RUST_BACKTRACE"] = "1" } @@ -100,7 +108,7 @@ local linters = new Mapping { local ci = (linters) { ["cargo_test"] { check = - "cargo nextest --manifest-path {{ workspace_indicator }} run --all-features --fail-fast -j 1" + "cargo nextest --manifest-path {{ workspace_indicator }} run --workspace --exclude thread-language --all-features --fail-fast -j 1 && cargo nextest --manifest-path {{ workspace_indicator }} run -p thread-language --features all-parsers,matching --fail-fast -j 1" } } diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..ed1d5f9 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: 2026 Knitli Inc. +# +# SPDX-License-Identifier: MIT OR Apache-2.0 +#:tombi schema.strict = false +# rustfmt configuration + +# Skip files that are intentionally broken or should not be formatted +ignore = [ + "crates/flow/tests/test_data/syntax_error.rs", +] +# Use 2024 edition by default +edition = "2024" +style_edition = "2024" diff --git a/sbom.spdx b/sbom.spdx deleted file mode 100644 index 58a9804..0000000 --- a/sbom.spdx +++ /dev/null @@ -1,1305 +0,0 @@ -SPDXVersion: SPDX-2.1 -DataLicense: CC0-1.0 -SPDXID: SPDXRef-DOCUMENT -DocumentName: thread -DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-57b0f83e-8b15-4b0f-966d-445319e3f8c0 -Creator: Person: Adam Poulemanos () -Creator: Organization: Knitli Inc. () -Creator: Tool: reuse-5.0.2 -Created: 2025-07-19T20:22:05Z -CreatorComment: This document was created automatically using available reuse information consistent with REUSE. -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-07e72f6ebe551d6b95993c9c9a521efd -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c54a7e1da625b12bea9d5d2e49d246ca -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-be45515061eb2b0ca15134f996f70d05 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-f97dbf2efbcd421cf45c55fdb45ac4c6 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-cc7a2a5256f01f6b53ffbf2c91d8816d -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c8f20abbd94580a0c18d89b6dd006e95 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2a025576974c7d06cd76b3e6f9a03eb2 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-bb3756cee51bf678543b049afb283a49 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-30a1d7c15b8accb80dd84e03cfbac958 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b40d0890d8726673b522cad1983fa61d -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-470aec490d4744118f0e42c8e2440f45 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b52bef1803120595f4b2a7829dcbb9ff -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-78c5a17a25c0fbe9a485604f75a75a88 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7c64483b50d50057e9bf774832a41335 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-bfa8b0f656a6947307a8a9ee5571b8d4 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3b4809ddb478fee87ed9acc56cd80263 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c16667d35b7d7d38b7560050130cd6aa -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7383628c997a024f664b4622800b3266 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b8aca7d6890642118a77e49b86db1b02 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2a39efeba1882ae4dea0a92c6ca89dc7 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-a1e1f4aabd2e746a94db5b19ceab994b -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5824608efe5377128ba30a46450c059b -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-da4f36c220bf6db4007063fdd168f1f7 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-10d5611a1e0c8e3e02bd3d4b6324160e -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-0617cdfaf5e8a6a2fbc107f258595637 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-511280701d0ea20f85dc8a0164648145 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-ec6b164aa3b721341d579b6978719839 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-6f3e55dd5b25c4af7dbd70921c9abd52 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9e78a25e26d11febe57b342f5650eb32 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b2f41e5acab68a9ce200ea2ee88723d8 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-143d447ff55195e81166cd9f44ba8244 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9fc7f2e7b5aada08f2b3557fed51a3ff -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-991c6b69c972b880a69ad024f0c07311 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-cc42a83a476dead4a95ad10c60c332a8 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-01713a9784b65de30ec6e463ae5cb2a8 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b1621c440e11b332e6a1c7cea66d04d1 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-82236b51eee80388f254120dde3e41b3 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d21244bb838c7d9a2e5de8619a6377fd -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-28b06523c48f3ba2a481874bcf9f78e9 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c375f56ad1017c32d8cd14b6bb931155 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-26feb5cd4a18c478407544ca536efbe6 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d6903737be443edab488390cde72ef44 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8a5a7f1116a67058386473b2917e3764 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-01e2dccd15ff532c7ffc6fde1f9d610a -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d44b27a20b1566215674619b967ea82f -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-378498ea06c3736f1e80991eb40e05f7 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-e556605ef942814f1aa6257de95a6f55 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-09f48fed9cb0332237f44fa8dfa5ebe0 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-ec9301d771546992ab95448a1b9f6c4e -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-6f08941627643ff7977c7a9a5310735c -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-1dbadf0d5f299a87e67e13e50912ea46 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-6a7d9b1bf74e92a66496575c971496c9 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-93cc0cca6ac0a415a8e94ed725ee0ac6 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9adcbf86a0c18c69d64450486315c9c3 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8ce21b9450fb9727e9e910389d6eccb0 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-fe05fb9fad746d3907eea8f5ebf0e52e -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-84dd26a32e71e8598737af625401f1c9 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4755e858a3ddf7999bf59f40c48821c6 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-87aabc808822ef9f16e6eb7ec57c4225 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-62bef916fc2e3aca8bc6f2582e01a3cb -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9242e18eec18fbbe46872b994d521352 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-1f3ad30e3477c1e63ce14c2b0491f134 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4679e2fb2736dfbe261dbc7014925aaa -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9f316d46be3893ad33252a9f85f0cd69 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-1a0b0ab05a8a32eb52ae983d7993792b -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-03dbb7df859510a45820e8afcb4db8b8 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-cc74a7e5dcaa4e3e6e59af5747b774c8 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5f68a60d241551b9478e8da4e1947f32 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5cf6bde490d11af95e99fe591950a539 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4e5549d97f78322abe3bf02fa034442f -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-88ddbbb712f12d1ad192aee361c9f00b -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c2ece0178066fe3eb3590b9967240cc9 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4f4f1b13f17307595fe3fe7d84552320 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c63352adae2e4c79a3b366b724c62636 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-44e3265b570a8ea06c6de3c1c19a88e8 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-f0bcf2e352906ea7e459f2904978e7e1 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5d82d5b7b5fcb8e574ff8f9e9f6cd950 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-72421e01ae5225be857aec8880eff7ff -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4344b19edab200ad510e59f6f65d9e67 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-6b093b0c4568d88fbe435cb2d5f8a6cf -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b4984edc7c211bd6548a399e64225b7c -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-69b1eaf75e7587a9ff31c790e499773f -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8c69dae73521d0cae6103c91eca41537 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-78ba32a9a61842d00cd60c7aa7b53870 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d84cd3748a1af00f29c0574be5dbf285 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-84b279b1a28794e6812ca95ba3c3b32b -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-755c900c2c113574ce12387662134517 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5b1d40119bd22e0bdda07ff21d0bfcb4 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-71503a120709919546f858fabb02fef5 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3b391b3b3ace0233c17487d0c8c59bc3 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4ff01a81cb40fa09167b85fcdb7d717c -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-aaeebd3424e75edc252e0fc0f9c40357 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-16c322de0974dc8ede200f764998850d -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c8716e9c443b191bdda41f18123231bc -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c20e2a59852c3e934e7c5219f37f164d -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-84c5799fc4644e42e9377609b1a0d8ba -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d4e5aee67a46f52b20e9ddc3cbf7f8a1 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-66c8e5b8b74181dab0efa93fedf04775 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3e20833918d83e1501367c81f699cd28 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-43b6916f4130a2f307648cbd8780c6ca -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2a1b8523c7ed302d1ae757565e9833ba -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-f8fc8dfa9cd986e616d1261fa6e3b60b -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-78101a273943c2ee663817abf1cec511 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9fb9f55a41c065aac1ce6ce1c46a6548 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-46d9043e3c5e09e750583361293dc3e3 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-387aea8823d88cdf386814687489a8a9 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-67c0c3ec0d27bbefa006f4f6b4435aaa -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2ec964f49a3ef9ff1163a8f86f6abd52 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-8f82799968b369568ab71617b47acae1 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-1186548580f204803d45c200407cf83e -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-fbc59c602fbeb91a3ad6eb814d67fcbe -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-ab578ef52433772de1a1ac40c24c5dd7 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7f63724cdb7306f17cb7ebb13b9696cf -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2f42843e8bad608efebd3fe792506733 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-cf45e0b9fb205344b522fc26a8298235 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-5ee3f66b8956f2ce1eff477aa68edd88 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-d979566a50fe94181ba534401c2c62b1 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-3c88c068e0fd6eb9b98ec09e04413a5b -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4e5f3eee8ceebc5edde78a3911cfdb49 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-402f57903a72a929777f9ecb50757632 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7b2801debffe14ddad59a2b912c1b3d1 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-c4c348a439df60cdf4aff38a25210bc1 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-a25112f597e8755c458099641b73386d -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-a52bee01e6a3136dfb1aa4c5801ca671 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-cd66635fb95e3bdc6e6138501afb03cc -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-ba1acab5d40bff5ac621372263b6d331 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b6ae9ad907495912115b7c8f9d53809e -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2c0d6c272508977525c03e1c21ab05dc -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-a2d59c457404f3e2c7adf8057a6e3767 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-9f2ead9ce46a115b43f8e59e3f8daf88 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-7d9220e1bfa8d6cd26e5486c4d0116d1 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2b3c6dc79aaa8ab187f18575424cec72 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-b0c7afe8a516b792025a21fac26f330d -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-00ab9cf365c27b94b2da4a73ab9274ba -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-fd90e7c132390ec22cac057ad5f86804 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-2a14fe7d658a46cff7436cfe88998325 -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-4939545db8b1a8a0b923d19c81ab970d -Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-29201f21040fd5b280c7cd4c8a504dda - -FileName: ./.gitattributes -SPDXID: SPDXRef-07e72f6ebe551d6b95993c9c9a521efd -FileChecksum: SHA1: 6456f547856c9ccc390f83394754d1318932b448 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.github/actionlint.yml -SPDXID: SPDXRef-c54a7e1da625b12bea9d5d2e49d246ca -FileChecksum: SHA1: 2be288649886e73b96f3e60f9f9f3b9dc04fde19 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.github/chatmodes/analyze.chatmode.md -SPDXID: SPDXRef-be45515061eb2b0ca15134f996f70d05 -FileChecksum: SHA1: fc16b6888e7fefccbdb7130836fd7170eb5fd0e7 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.github/dependabot.yml -SPDXID: SPDXRef-f97dbf2efbcd421cf45c55fdb45ac4c6 -FileChecksum: SHA1: 2169a4f3f33aca28f9128fdd26c0ceb9ae703cf9 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.github/dontusefornow.md -SPDXID: SPDXRef-cc7a2a5256f01f6b53ffbf2c91d8816d -FileChecksum: SHA1: 913bfb2bf1a0d1b58b67df69b68c70e915a1a9ca -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.github/workflows/ci.yml -SPDXID: SPDXRef-c8f20abbd94580a0c18d89b6dd006e95 -FileChecksum: SHA1: b790dad3a272abeede72eeb7668ceb6761de8afc -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.github/workflows/cla.yml -SPDXID: SPDXRef-2a025576974c7d06cd76b3e6f9a03eb2 -FileChecksum: SHA1: 49de827f0cbe38e4d48c753281faa497bf1a9a26 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.gitignore -SPDXID: SPDXRef-bb3756cee51bf678543b049afb283a49 -FileChecksum: SHA1: 4075a806092f7757ec209c0e234028b2c91a6cc9 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.mcp.json -SPDXID: SPDXRef-30a1d7c15b8accb80dd84e03cfbac958 -FileChecksum: SHA1: cf461167ff1250468324e5cd024c03d7cbcf1c97 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.vscode/settings.json -SPDXID: SPDXRef-b40d0890d8726673b522cad1983fa61d -FileChecksum: SHA1: 9b31788fc27afb2274b52ca4f6cfa207f64774b9 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./.yamlfmt.yml -SPDXID: SPDXRef-470aec490d4744118f0e42c8e2440f45 -FileChecksum: SHA1: e12c7340cebc58a4a60beeac59e2c0aa610f6186 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./CLAUDE.md -SPDXID: SPDXRef-b52bef1803120595f4b2a7829dcbb9ff -FileChecksum: SHA1: 1f88065390da4f46a8361fa1ee17dec735b01d0e -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./CONTRIBUTORS_LICENSE_AGREEMENT.md -SPDXID: SPDXRef-78c5a17a25c0fbe9a485604f75a75a88 -FileChecksum: SHA1: 2dde379d01d323ebbf4fd94ace8f68f08f62471c -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./Cargo.lock -SPDXID: SPDXRef-7c64483b50d50057e9bf774832a41335 -FileChecksum: SHA1: f062c7049a6dfc29818c3007d8d4e47851839a0e -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./Cargo.toml -SPDXID: SPDXRef-bfa8b0f656a6947307a8a9ee5571b8d4 -FileChecksum: SHA1: ba77c3c6e4b8be699267b760e210530a76820234 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./PLAN.md -SPDXID: SPDXRef-3b4809ddb478fee87ed9acc56cd80263 -FileChecksum: SHA1: 444991826f65bc3a719dc43e8d7b0c219fa5b4ed -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./README.md -SPDXID: SPDXRef-c16667d35b7d7d38b7560050130cd6aa -FileChecksum: SHA1: cefb1614025e52e89680215c1ed5198f8d35d07f -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./VENDORED.md -SPDXID: SPDXRef-7383628c997a024f664b4622800b3266 -FileChecksum: SHA1: baed3674028cf40f19e04ecf0f8c3db3f6eebc08 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./_typos.toml -SPDXID: SPDXRef-b8aca7d6890642118a77e49b86db1b02 -FileChecksum: SHA1: 62cd32ccdbfbe23cb82baed05b12cf0fce04a4c0 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./_unused.toml -SPDXID: SPDXRef-2a39efeba1882ae4dea0a92c6ca89dc7 -FileChecksum: SHA1: 21ad928a9262fdb2b1dc3ba408aa702694c4833c -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/Cargo.toml -SPDXID: SPDXRef-a1e1f4aabd2e746a94db5b19ceab994b -FileChecksum: SHA1: 6fc3aad9796b65042b08f54b76dbbb2dfb46fb75 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/README.md -SPDXID: SPDXRef-5824608efe5377128ba30a46450c059b -FileChecksum: SHA1: 435d13dbfedf1660d5d2ec6dc842517e79247876 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/VENDORED.md -SPDXID: SPDXRef-da4f36c220bf6db4007063fdd168f1f7 -FileChecksum: SHA1: baed3674028cf40f19e04ecf0f8c3db3f6eebc08 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/benches/performance_improvements.rs -SPDXID: SPDXRef-10d5611a1e0c8e3e02bd3d4b6324160e -FileChecksum: SHA1: 61ae466bb6ecbc71d7d26cdd837a5fba2f744d1e -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/language.rs -SPDXID: SPDXRef-0617cdfaf5e8a6a2fbc107f258595637 -FileChecksum: SHA1: f4b2a189dd277fa0277fdca20e73cca36e3e51ed -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/lib.rs -SPDXID: SPDXRef-511280701d0ea20f85dc8a0164648145 -FileChecksum: SHA1: 7f17156e3c72e7cb869a03ec0f4df11683dc973a -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/match_tree/match_node.rs -SPDXID: SPDXRef-ec6b164aa3b721341d579b6978719839 -FileChecksum: SHA1: b94adcabaff30b55e024df24c86cdf0a72d0012e -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/match_tree/mod.rs -SPDXID: SPDXRef-6f3e55dd5b25c4af7dbd70921c9abd52 -FileChecksum: SHA1: 14c60649c88759a2cfc0d17c559f5f3b158fbd3d -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/match_tree/strictness.rs -SPDXID: SPDXRef-9e78a25e26d11febe57b342f5650eb32 -FileChecksum: SHA1: b722fc3717e4971bfcf5ed1702c179e86e5fdbfe -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/matcher.rs -SPDXID: SPDXRef-b2f41e5acab68a9ce200ea2ee88723d8 -FileChecksum: SHA1: cb61df836b9eb4df491995a37c519b9d9c414a8f -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/matchers/kind.rs -SPDXID: SPDXRef-143d447ff55195e81166cd9f44ba8244 -FileChecksum: SHA1: c86a66f7a3758c7460e4d2a891f942a1d06d385c -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/matchers/mod.rs -SPDXID: SPDXRef-9fc7f2e7b5aada08f2b3557fed51a3ff -FileChecksum: SHA1: 1e3a28399ef3ea31826ba5cf811a83e8cf8df067 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/matchers/node_match.rs -SPDXID: SPDXRef-991c6b69c972b880a69ad024f0c07311 -FileChecksum: SHA1: 0b1ed8fbdd0ba2809c6436572ccafed574a53f12 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/matchers/pattern.rs -SPDXID: SPDXRef-cc42a83a476dead4a95ad10c60c332a8 -FileChecksum: SHA1: 6bd4e09da650b62c4811231d8fbe1fb5710c9ca7 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/matchers/text.rs -SPDXID: SPDXRef-01713a9784b65de30ec6e463ae5cb2a8 -FileChecksum: SHA1: 3961bdef1fe8816e77be7fc99f020d014c6a4250 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/matchers/types.rs -SPDXID: SPDXRef-b1621c440e11b332e6a1c7cea66d04d1 -FileChecksum: SHA1: 37fe02f426016e4407a682d5b5136a39ef79fce3 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/meta_var.rs -SPDXID: SPDXRef-82236b51eee80388f254120dde3e41b3 -FileChecksum: SHA1: 61b1e099ca5826bba0ea5dec4453bc88dbd5e885 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/node.rs -SPDXID: SPDXRef-d21244bb838c7d9a2e5de8619a6377fd -FileChecksum: SHA1: aa0e9a9f6f52c69040b75ecc07508e2365f7e4ee -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/ops.rs -SPDXID: SPDXRef-28b06523c48f3ba2a481874bcf9f78e9 -FileChecksum: SHA1: 745443b40b1c0ee5482d263fecfb36c3e2ffa99b -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/pinned.rs -SPDXID: SPDXRef-c375f56ad1017c32d8cd14b6bb931155 -FileChecksum: SHA1: 8cab1668d6f88f5ff9af57030f6c9ffd258a24ec -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/replacer.rs -SPDXID: SPDXRef-26feb5cd4a18c478407544ca536efbe6 -FileChecksum: SHA1: 4a244a268c7b4f4128ae4607c62918c53e0ece9c -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/replacer/indent.rs -SPDXID: SPDXRef-d6903737be443edab488390cde72ef44 -FileChecksum: SHA1: 1feca68b1a3a43c7c72ba60b0f1295f87b6a0ee8 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/replacer/structural.rs -SPDXID: SPDXRef-8a5a7f1116a67058386473b2917e3764 -FileChecksum: SHA1: 2cde4c088be5f128f1833a0329349f115d2c8b6e -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/replacer/template.rs -SPDXID: SPDXRef-01e2dccd15ff532c7ffc6fde1f9d610a -FileChecksum: SHA1: ea927fc52f716c1b7440cc847a174d794ec8a923 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/source.rs -SPDXID: SPDXRef-d44b27a20b1566215674619b967ea82f -FileChecksum: SHA1: a646365ec895b187e18a08f8acb494651c1ca6b0 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/tree_sitter/mod.rs -SPDXID: SPDXRef-378498ea06c3736f1e80991eb40e05f7 -FileChecksum: SHA1: cb8a97a5e7f2a2c45ed4660599b8108ef7c6ea7a -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/ast-engine/src/tree_sitter/traversal.rs -SPDXID: SPDXRef-e556605ef942814f1aa6257de95a6f55 -FileChecksum: SHA1: 1712ce876b9f51e7f5047a477a3436caed3cacab -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/Cargo.toml -SPDXID: SPDXRef-09f48fed9cb0332237f44fa8dfa5ebe0 -FileChecksum: SHA1: 51afc75835902170180785253d188cc07551d5bb -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/README.md -SPDXID: SPDXRef-ec9301d771546992ab95448a1b9f6c4e -FileChecksum: SHA1: 435d13dbfedf1660d5d2ec6dc842517e79247876 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/VENDORED.md -SPDXID: SPDXRef-6f08941627643ff7977c7a9a5310735c -FileChecksum: SHA1: baed3674028cf40f19e04ecf0f8c3db3f6eebc08 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/benches/performance.rs -SPDXID: SPDXRef-1dbadf0d5f299a87e67e13e50912ea46 -FileChecksum: SHA1: e7ff347781b9b83648504b0f937865f18d7bd32d -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/bash.rs -SPDXID: SPDXRef-6a7d9b1bf74e92a66496575c971496c9 -FileChecksum: SHA1: 8072953096f1ca0dcae055fca596159f21bc8e76 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/cpp.rs -SPDXID: SPDXRef-93cc0cca6ac0a415a8e94ed725ee0ac6 -FileChecksum: SHA1: 10bd6ccfe574caa4dd739f734ebad4d6badb086d -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/csharp.rs -SPDXID: SPDXRef-9adcbf86a0c18c69d64450486315c9c3 -FileChecksum: SHA1: 32f02da0da4204b0089939c940ad8e711794ba59 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/css.rs -SPDXID: SPDXRef-8ce21b9450fb9727e9e910389d6eccb0 -FileChecksum: SHA1: 4c9ab64b4977931ea9995194cd2aa0a251a9c3fc -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/elixir.rs -SPDXID: SPDXRef-fe05fb9fad746d3907eea8f5ebf0e52e -FileChecksum: SHA1: 15a09881e448b5f2d56dc7c4d1bf558da0ef27a1 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/go.rs -SPDXID: SPDXRef-84dd26a32e71e8598737af625401f1c9 -FileChecksum: SHA1: 71ac17064b16d4db11a6983ee1910992ec186228 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/haskell.rs -SPDXID: SPDXRef-4755e858a3ddf7999bf59f40c48821c6 -FileChecksum: SHA1: ab8e3fdfba48f455fe452e85066b92bb3e9ffef8 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/html.rs -SPDXID: SPDXRef-87aabc808822ef9f16e6eb7ec57c4225 -FileChecksum: SHA1: c49d87934ad12e94039ee59f7a000f82a71f775b -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/json.rs -SPDXID: SPDXRef-62bef916fc2e3aca8bc6f2582e01a3cb -FileChecksum: SHA1: bc22290cd59cbca72ebc4d7ad42101f851fb188d -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/kotlin.rs -SPDXID: SPDXRef-9242e18eec18fbbe46872b994d521352 -FileChecksum: SHA1: 3b303f089d0c743802c0e0a653468d17ed6d4e75 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/lib.rs -SPDXID: SPDXRef-1f3ad30e3477c1e63ce14c2b0491f134 -FileChecksum: SHA1: d8e20e50f91cb1a6792af5c4bfc2ba478303cd5b -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/lua.rs -SPDXID: SPDXRef-4679e2fb2736dfbe261dbc7014925aaa -FileChecksum: SHA1: 7384ddc3ee2af2941fcd15c6a7a7fee662ba10c5 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/parsers.rs -SPDXID: SPDXRef-9f316d46be3893ad33252a9f85f0cd69 -FileChecksum: SHA1: a163dc163c7c86ad7514e5202d3c30cddd2a77e4 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/php.rs -SPDXID: SPDXRef-1a0b0ab05a8a32eb52ae983d7993792b -FileChecksum: SHA1: 864c85dc910714892ed2e4f060697ec4808cb6c3 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/profiling.rs -SPDXID: SPDXRef-03dbb7df859510a45820e8afcb4db8b8 -FileChecksum: SHA1: 6d7bb03592543231f225f031b6916621386e9f97 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/python.rs -SPDXID: SPDXRef-cc74a7e5dcaa4e3e6e59af5747b774c8 -FileChecksum: SHA1: 64a41c9190f076c447a9fa714bb7c5f11131645d -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/ruby.rs -SPDXID: SPDXRef-5f68a60d241551b9478e8da4e1947f32 -FileChecksum: SHA1: d8216d76760e924e5cd2154c4693b6fb53bd0262 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/rust.rs -SPDXID: SPDXRef-5cf6bde490d11af95e99fe591950a539 -FileChecksum: SHA1: bbab11a186618a91081d7dfe3236535fce3e5658 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/scala.rs -SPDXID: SPDXRef-4e5549d97f78322abe3bf02fa034442f -FileChecksum: SHA1: ff26a8d00beeeb1f0a1c5bf36c2b338215743e60 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/swift.rs -SPDXID: SPDXRef-88ddbbb712f12d1ad192aee361c9f00b -FileChecksum: SHA1: 6da351622739120e60c7d3fa2bde984ea67d3a5e -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/language/src/yaml.rs -SPDXID: SPDXRef-c2ece0178066fe3eb3590b9967240cc9 -FileChecksum: SHA1: 87cccb3eb69636563e81e616d5f1244247e65813 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/Cargo.toml -SPDXID: SPDXRef-4f4f1b13f17307595fe3fe7d84552320 -FileChecksum: SHA1: 940d954b44ec7580dd82ae67fae05f81af8b62a4 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/VENDORED.md -SPDXID: SPDXRef-c63352adae2e4c79a3b366b724c62636 -FileChecksum: SHA1: baed3674028cf40f19e04ecf0f8c3db3f6eebc08 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/benches/README.md -SPDXID: SPDXRef-44e3265b570a8ea06c6de3c1c19a88e8 -FileChecksum: SHA1: 1225527dd65a326592c10946e4ecbe10b92a5831 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/benches/ast_grep_comparison.rs -SPDXID: SPDXRef-f0bcf2e352906ea7e459f2904978e7e1 -FileChecksum: SHA1: 892939018a7d87efac60d0a08f5ecaf49da8570c -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/benches/comparison_benchmarks.rs -SPDXID: SPDXRef-5d82d5b7b5fcb8e574ff8f9e9f6cd950 -FileChecksum: SHA1: 33bb7572656b5ac35e73c90543adbebddd4632dc -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/benches/rule.yml -SPDXID: SPDXRef-72421e01ae5225be857aec8880eff7ff -FileChecksum: SHA1: 5e67b80b45f899ff3788cb5897c1d73c70a0666c -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/benches/rule_engine_benchmarks.rs -SPDXID: SPDXRef-4344b19edab200ad510e59f6f65d9e67 -FileChecksum: SHA1: 714042067ff7314471d9c5755e60be86e0beef0f -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/benches/simple_benchmarks.rs -SPDXID: SPDXRef-6b093b0c4568d88fbe435cb2d5f8a6cf -FileChecksum: SHA1: a54f0749f0583d632572fea1c9b6455184aea0b0 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/serialization_analysis/README_SERIALIZATION_ANALYSIS.md -SPDXID: SPDXRef-b4984edc7c211bd6548a399e64225b7c -FileChecksum: SHA1: fbee6747ceaabff74d7f889bcf034c04d1e4aea3 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/serialization_analysis/SERIALIZATION_ANALYSIS_REPORT.md -SPDXID: SPDXRef-69b1eaf75e7587a9ff31c790e499773f -FileChecksum: SHA1: 748032a5d6360a760aa20288b339256ce10f8c4a -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/serialization_analysis/analyze_serialization.rs -SPDXID: SPDXRef-8c69dae73521d0cae6103c91eca41537 -FileChecksum: SHA1: 6800ac1f36fe9fd1c362e8120fa68537e78f5fc4 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/serialization_analysis/separation_helper.sh -SPDXID: SPDXRef-78ba32a9a61842d00cd60c7aa7b53870 -FileChecksum: SHA1: 96bf1a6a35c1dbbf1004fc3822fb7afb0cd11ad1 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/serialization_analysis/serialization_analysis.yml -SPDXID: SPDXRef-d84cd3748a1af00f29c0574be5dbf285 -FileChecksum: SHA1: 8207a9d42e00fb6879ddb1903a048fcff095f570 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/serialization_analysis/serialization_analysis_report.md -SPDXID: SPDXRef-84b279b1a28794e6812ca95ba3c3b32b -FileChecksum: SHA1: 634c2ec44bfddca7ae800e3546e82166f77b5c84 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/check_var.rs -SPDXID: SPDXRef-755c900c2c113574ce12387662134517 -FileChecksum: SHA1: 3098b53eb8d5c7354ca25d7b1be92e13d1a8ed80 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/combined.rs -SPDXID: SPDXRef-5b1d40119bd22e0bdda07ff21d0bfcb4 -FileChecksum: SHA1: e4a6fe5eb3ffee7a9479ba5d894dc87731c1d7a1 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/fixer.rs -SPDXID: SPDXRef-71503a120709919546f858fabb02fef5 -FileChecksum: SHA1: 07dbdedfc81ad0c4df220c8c98b0bff4cd7abfe6 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/label.rs -SPDXID: SPDXRef-3b391b3b3ace0233c17487d0c8c59bc3 -FileChecksum: SHA1: 1d7f4a305c09ec5a6aba6e0afda7d7dbbe0770c8 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/lib.rs -SPDXID: SPDXRef-4ff01a81cb40fa09167b85fcdb7d717c -FileChecksum: SHA1: 4f896496007a9ca62f4fde23d258e688cf91103e -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/maybe.rs -SPDXID: SPDXRef-aaeebd3424e75edc252e0fc0f9c40357 -FileChecksum: SHA1: 68c65672b6ac761ebca03d7eed41f54651222477 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule/deserialize_env.rs -SPDXID: SPDXRef-16c322de0974dc8ede200f764998850d -FileChecksum: SHA1: 93c3b5b7dbc2f0c6dcd647206916c13e9294d660 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule/mod.rs -SPDXID: SPDXRef-c8716e9c443b191bdda41f18123231bc -FileChecksum: SHA1: f0d36f0f03354b98c229b598e5be2f098ab5f488 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule/nth_child.rs -SPDXID: SPDXRef-c20e2a59852c3e934e7c5219f37f164d -FileChecksum: SHA1: dd7fd3b130a2e7ecf73e293d2cd89df9effee25b -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule/range.rs -SPDXID: SPDXRef-84c5799fc4644e42e9377609b1a0d8ba -FileChecksum: SHA1: 583ebcd5014fbf31e3d7d04ac39c63d10467fd40 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule/referent_rule.rs -SPDXID: SPDXRef-d4e5aee67a46f52b20e9ddc3cbf7f8a1 -FileChecksum: SHA1: 3410d941b6caf35ae06bbceee30aeb8cf6a5fb11 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule/relational_rule.rs -SPDXID: SPDXRef-66c8e5b8b74181dab0efa93fedf04775 -FileChecksum: SHA1: bba0c9184158934a041ef3e5bfd33b4b97ca5a03 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule/stop_by.rs -SPDXID: SPDXRef-3e20833918d83e1501367c81f699cd28 -FileChecksum: SHA1: 6a6725ef5b74d5f31c7d31472f5670328b6a5f68 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule_collection.rs -SPDXID: SPDXRef-43b6916f4130a2f307648cbd8780c6ca -FileChecksum: SHA1: 4c76b67eb0864f671163b577a8adbc8465f0b329 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule_config.rs -SPDXID: SPDXRef-2a1b8523c7ed302d1ae757565e9833ba -FileChecksum: SHA1: 67f9fd8817f84d7bb3728efddcd3ce073f313d87 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/rule_core.rs -SPDXID: SPDXRef-f8fc8dfa9cd986e616d1261fa6e3b60b -FileChecksum: SHA1: 9b25ea6eaa10a0daf31a2a580ee17cba89d1edba -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/transform/mod.rs -SPDXID: SPDXRef-78101a273943c2ee663817abf1cec511 -FileChecksum: SHA1: 3c419cb6a73997b0655c8d461d5ddf2d024f8781 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/transform/parse.rs -SPDXID: SPDXRef-9fb9f55a41c065aac1ce6ce1c46a6548 -FileChecksum: SHA1: c393f511bc1aa4efd4713751b74c96c64f8bc95e -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/transform/rewrite.rs -SPDXID: SPDXRef-46d9043e3c5e09e750583361293dc3e3 -FileChecksum: SHA1: 171e04949d7ca93d3775813351f47bb760e0740e -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/transform/string_case.rs -SPDXID: SPDXRef-387aea8823d88cdf386814687489a8a9 -FileChecksum: SHA1: 92daabff413359fe9e0ce87049bbbdd63f98c918 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/src/transform/trans.rs -SPDXID: SPDXRef-67c0c3ec0d27bbefa006f4f6b4435aaa -FileChecksum: SHA1: 413ec7958fc17b88173eab82b74cd9e0b4f3206a -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/test_data/sample_javascript.js -SPDXID: SPDXRef-2ec964f49a3ef9ff1163a8f86f6abd52 -FileChecksum: SHA1: d854ee84a37715307da6c512a34befb6e5476aad -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/test_data/sample_python.py -SPDXID: SPDXRef-8f82799968b369568ab71617b47acae1 -FileChecksum: SHA1: 1f864a1e0c3abf6f98819594e3a1013d4409e671 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/test_data/sample_rust.rs -SPDXID: SPDXRef-1186548580f204803d45c200407cf83e -FileChecksum: SHA1: a50d6b36c13cdb2048f21dece72581b9a13bd957 -LicenseConcluded: AGPL-3.0-or-later AND MIT -LicenseInfoInFile: AGPL-3.0-or-later -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/rule-engine/test_data/sample_typescript.ts -SPDXID: SPDXRef-fbc59c602fbeb91a3ad6eb814d67fcbe -FileChecksum: SHA1: 1c9a937682da21649671c999cce7c17154fdef45 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/services/Cargo.toml -SPDXID: SPDXRef-ab578ef52433772de1a1ac40c24c5dd7 -FileChecksum: SHA1: b3d9cdc7bed48fef1637fb679b7ef8752cf724fe -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/services/README.md -SPDXID: SPDXRef-7f63724cdb7306f17cb7ebb13b9696cf -FileChecksum: SHA1: 435d13dbfedf1660d5d2ec6dc842517e79247876 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/services/src/lib.rs -SPDXID: SPDXRef-2f42843e8bad608efebd3fe792506733 -FileChecksum: SHA1: 27ad6389ddf9c9d2c5f18b2bc301bb5cf7ea553e -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/utils/Cargo.toml -SPDXID: SPDXRef-cf45e0b9fb205344b522fc26a8298235 -FileChecksum: SHA1: 914a2208eed4703895b2f6faf0e1fa88aac938a0 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/utils/README.md -SPDXID: SPDXRef-5ee3f66b8956f2ce1eff477aa68edd88 -FileChecksum: SHA1: fb4293c22fd8b65392c8d505673b5b2728023fda -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/utils/src/hash_help.rs -SPDXID: SPDXRef-d979566a50fe94181ba534401c2c62b1 -FileChecksum: SHA1: ba564844435251db7a3f8f818f816f9b6ffbc7e9 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/utils/src/lib.rs -SPDXID: SPDXRef-3c88c068e0fd6eb9b98ec09e04413a5b -FileChecksum: SHA1: fcda5df421f821dc6d46767a5a87d68b14d1bda7 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/utils/src/simd.rs -SPDXID: SPDXRef-4e5f3eee8ceebc5edde78a3911cfdb49 -FileChecksum: SHA1: 1b2f5b34a284a426f86e155ff09988f369bd65b8 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/wasm/.appveyor.yml -SPDXID: SPDXRef-402f57903a72a929777f9ecb50757632 -FileChecksum: SHA1: b93862a2f2a88005e06f200aae67b526c84c670a -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/wasm/.gitignore -SPDXID: SPDXRef-7b2801debffe14ddad59a2b912c1b3d1 -FileChecksum: SHA1: 46315b956812e71a0f7525ee2e5e948647733f0d -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/wasm/.travis.yml -SPDXID: SPDXRef-c4c348a439df60cdf4aff38a25210bc1 -FileChecksum: SHA1: bba68b8c29c494b923867326281d6733d3c13c32 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/wasm/Cargo.toml -SPDXID: SPDXRef-a25112f597e8755c458099641b73386d -FileChecksum: SHA1: 89f7e275a698f4d680c536bb6e4b51b3ba201999 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/wasm/README.md -SPDXID: SPDXRef-a52bee01e6a3136dfb1aa4c5801ca671 -FileChecksum: SHA1: f83292f24277ad935144fca7e45deff9127fcc7f -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/wasm/src/lib.rs -SPDXID: SPDXRef-cd66635fb95e3bdc6e6138501afb03cc -FileChecksum: SHA1: ea3e48f14cd350c8deec7b46cbfc1089d23a694d -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/wasm/src/utils.rs -SPDXID: SPDXRef-ba1acab5d40bff5ac621372263b6d331 -FileChecksum: SHA1: a289cd51675dd0c9d5619eb8dd776d23bb572164 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./crates/wasm/tests/web.rs -SPDXID: SPDXRef-b6ae9ad907495912115b7c8f9d53809e -FileChecksum: SHA1: f3c60e53827ea1d1b31dfd8b0f9849aaf7018597 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./deny.toml -SPDXID: SPDXRef-2c0d6c272508977525c03e1c21ab05dc -FileChecksum: SHA1: 4ff7f872f977d2215a579a962eb6b24f6384de95 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./hk.pkl -SPDXID: SPDXRef-a2d59c457404f3e2c7adf8057a6e3767 -FileChecksum: SHA1: 243a977f13a98def08e0fdd6c6eb33308fae4db4 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./info/Pattern.md -SPDXID: SPDXRef-9f2ead9ce46a115b43f8e59e3f8daf88 -FileChecksum: SHA1: c2090d02a46224575e44348c39e40b5f34f24306 -LicenseConcluded: MIT -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2023 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> - -FileName: ./info/ag-instruct.md -SPDXID: SPDXRef-7d9220e1bfa8d6cd26e5486c4d0116d1 -FileChecksum: SHA1: ede6772a521abd329f6efa48f9c56fbc445e15cf -LicenseConcluded: MIT -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2022 Herrington Darkholme <2883231+HerringtonDarkholme@users.noreply.github.com> -SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./mise.toml -SPDXID: SPDXRef-2b3c6dc79aaa8ab187f18575424cec72 -FileChecksum: SHA1: dbf5723e981b595202ce4b016878dd2080cc308f -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./scripts/get-langs.sh -SPDXID: SPDXRef-b0c7afe8a516b792025a21fac26f330d -FileChecksum: SHA1: ad10a77ef3d92a5b7458d768e4b56ca28a932079 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./scripts/install-mise.sh -SPDXID: SPDXRef-00ab9cf365c27b94b2da4a73ab9274ba -FileChecksum: SHA1: a85786553578e90814aaf33e8a8ce24815f7bcd6 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./scripts/update-licenses.py -SPDXID: SPDXRef-fd90e7c132390ec22cac057ad5f86804 -FileChecksum: SHA1: beb16286a14d168af6112cd3a4d41718620657d1 -LicenseConcluded: AGPL-3.0-or-later -LicenseInfoInFile: AGPL-3.0-or-later -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./xtask/Cargo.toml -SPDXID: SPDXRef-2a14fe7d658a46cff7436cfe88998325 -FileChecksum: SHA1: ba0dcaaef669d3e8dc7accbdfdc08ed66d148362 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./xtask/README.md -SPDXID: SPDXRef-4939545db8b1a8a0b923d19c81ab970d -FileChecksum: SHA1: 4a26700a3c5803115c3c3ef82f46454141948047 -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - -FileName: ./xtask/src/main.rs -SPDXID: SPDXRef-29201f21040fd5b280c7cd4c8a504dda -FileChecksum: SHA1: 8e1c00c48a78fe252bd804ced5afbc0a80a0f07b -LicenseConcluded: Apache-2.0 OR MIT -LicenseInfoInFile: Apache-2.0 -LicenseInfoInFile: MIT -FileCopyrightText: SPDX-FileCopyrightText: 2025 Knitli Inc. - diff --git a/specs/001-realtime-code-graph/RESEARCH_SUMMARY.md b/specs/001-realtime-code-graph/RESEARCH_SUMMARY.md index 4584dd3..7b21d89 100644 --- a/specs/001-realtime-code-graph/RESEARCH_SUMMARY.md +++ b/specs/001-realtime-code-graph/RESEARCH_SUMMARY.md @@ -17,10 +17,13 @@ SPDX-License-Identifier: AGPL-3.0-or-later ## Quick Findings ### The Question + **How can ReCoco's native provenance tracking enhance FR-014 ("System MUST track analysis provenance...") compared to T079's current "repository_id only" approach?** ### The Answer + **ReCoco (Thread's Rust-only CocoIndex fork) provides sophisticated automatic lineage tracking that captures:** + 1. βœ“ Source versions (Git commits, S3 ETags, timestamps) 2. βœ“ Transformation pipeline (which analysis stages ran) 3. βœ“ Cache status (hit/miss for each operation) @@ -31,6 +34,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later **T079 Enhanced Scope**: Full provenance leveraging ReCoco ### The Opportunity + **Current T079 misses 80% of valuable provenance data** that ReCoco provides automatically **Note**: Basic content addressing (Blake3 fingerprinting) IS already implemented in thread-flow via `AnalysisDefFingerprint`. The gap is the *rich* provenance (source version, pipeline lineage, cache metadata, upstream hashes). @@ -43,6 +47,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later ### What is Provenance Tracking? Provenance = Understanding the complete "history" of data: + - "Where did this node come from?" - "When was it analyzed?" - "Which stages created it?" @@ -57,6 +62,7 @@ Provenance = Understanding the complete "history" of data: **Missing**: Version and timestamp (incomplete FR-014 implementation) **ReCoco Provides**: + - Data source βœ“ - Version (Git commit, S3 ETag) βœ“ - Timestamp (when accessed) βœ“ @@ -69,7 +75,8 @@ Provenance = Understanding the complete "history" of data: ### 1. ReCoco Architecture Supports Provenance **Dataflow Structure**: -``` + +```plaintext Source β†’ Parse β†’ Extract β†’ RuleMatch β†’ BuildGraph β†’ Target ↓ ↓ ↓ ↓ ↓ ↓ Track Track Track Track Track Track @@ -78,6 +85,7 @@ version output output output output time ``` **At Each Stage**: + - Input hash (what was processed) - Output hash (what was produced) - Execution time (how long) @@ -85,6 +93,7 @@ version output output output output time - Operation type and version **Already Implemented in thread-flow**: + ```rust // crates/flow/src/incremental/types.rs pub struct AnalysisDefFingerprint { @@ -92,11 +101,13 @@ pub struct AnalysisDefFingerprint { pub source_file: PathBuf, } ``` + Basic source file tracking βœ… and content hashing βœ… are operational. ### 2. Current T079 Scope Gap **What thread-flow Already Has** (basic provenance): + ```rust pub fingerprint: Fingerprint, // βœ“ Blake3 hash of file content pub source_file: PathBuf, // βœ“ Source file path @@ -104,6 +115,7 @@ pub source_file: PathBuf, // βœ“ Source file path ``` **What's Still Missing** (expanded T079): + ```rust pub source_version: SourceVersion, // βœ— Git commit, timestamp pub analysis_lineage: Vec, // βœ— Which stages ran @@ -115,7 +127,7 @@ pub upstream_hashes: Vec, // βœ— Upstream data ### 3. Advantages of Enhanced Provenance | Feature | Value | Impact | -|---------|-------|--------| +| --------- | ------- | -------- | | **Source Version** | Know exact Git commit | Can trace to code review | | **Timestamps** | Know when analyzed | Detect stale analysis | | **Pipeline Tracking** | Know which tiers ran | Debug conflict detection | @@ -125,14 +137,17 @@ pub upstream_hashes: Vec, // βœ— Upstream data ### 4. Enables Better Compliance **FR-014 Requirement**: Data source, version, timestamp + - Current T079: βœ— Missing version and timestamp - Enhanced T079: βœ“ Complete implementation **FR-018 Requirement**: Audit logs for conflicts + - Current: βœ— Can't trace why conflict detected - Enhanced: βœ“ Full tier-by-tier analysis recorded **SC-CACHE-001**: >90% cache hit rate + - Current: βœ— Can't verify cache working - Enhanced: βœ“ Cache metadata proves effectiveness @@ -157,6 +172,7 @@ ExecutionRecord { ``` **How Thread Uses It**: + ```rust // Tier 1 AST diff thread_parse operator executes (crates/flow/src/functions/parse.rs) @@ -173,6 +189,7 @@ node_provenance = [parse_record, extract_record, ...] ### Data Model **Enhanced GraphNode**: + ```rust pub struct GraphNode { pub id: NodeId, @@ -196,6 +213,7 @@ pub struct GraphNode { **Recommended**: "Implement comprehensive provenance tracking leveraging ReCoco" **Why**: + - Same implementation effort with ReCoco data - Prevents rework and schema changes later - Fully complies with FR-014 and FR-018 @@ -205,31 +223,37 @@ pub struct GraphNode { ### 2. Phased Implementation **Phase 1 (Week 1)**: Define provenance types + - `SourceVersion`, `LineageRecord`, `EdgeCreationMethod` - Update `GraphNode` and `GraphEdge` structures **Phase 2 (Week 2-3)**: Storage and persistence + - Create provenance tables (Postgres/D1) - Implement storage abstraction **Phase 3 (Week 4)**: ReCoco integration + - Build `ProvenanceCollector` to extract ExecutionRecords - Wire into dataflow execution via ThreadFlowBuilder - Note: ThreadFlowBuilder and ReCoco operators already exist in thread-flow; this extends them to capture and persist ExecutionRecord metadata **Phase 4 (Week 5)**: APIs and validation + - Implement `ProvenanceQuery` API - Build debugging tools ### 3. Backward Compatibility **Approach**: Optional fields initially + - Existing nodes continue working - New analyses get full provenance - Lazy migration of old data **No Breaking Changes**: + ```rust pub source_version: Option, // Optional pub analysis_lineage: Option>, // Optional @@ -247,7 +271,7 @@ pub analysis_lineage: Option>, // Optional ## Missed Opportunities (Current T079) | Opportunity | ReCoco Provides | T079 Status | Loss | -|---|---|---|---| +| --- | --- | --- | --- | | Source Version Tracking | Git commit, S3 ETag | βœ— Missing | Can't verify freshness | | Timestamp Precision | Per-operation times | βœ— Missing | Can't detect staleness | | Conflict Audit Trail | Tier execution records | βœ— Missing | Can't debug conflicts | @@ -260,16 +284,19 @@ pub analysis_lineage: Option>, // Optional ## Implementation Effort ### Time Estimate + - **Low**: 25 hours (1 week implementation) - **High**: 35 hours (with comprehensive testing) - **Recommended**: 30 hours (1 week + validation) ### Complexity + - **Moderate**: Adding new types and database tables - **Straightforward**: ReCoco handles data collection - **No**: Complex algorithms needed ### Risk + - **Low**: Backward compatible with optional fields - **Low**: ReCoco API is stable (core concept) - **Mitigated**: Phased rollout strategy @@ -279,9 +306,11 @@ pub analysis_lineage: Option>, // Optional ## What Gets Enabled ### Debugging Conflict Detection + **Question**: "Why was this conflict detected?" **Answer** (with enhanced provenance): -``` + +```plaintext Conflict "function signature changed" detected 2026-01-11T10:30:15Z Tier 1 (AST diff): Found signature change in 15ms (confidence: 0.6) Tier 2 (Semantic): Type incompatibility confirmed in 450ms (confidence: 0.85) @@ -290,12 +319,15 @@ Final confidence: 0.95 (Tier 3 validated) ``` ### Incremental Update Optimization + **Upstream change detected**: File X hash changed **With provenance**: Find all nodes where `upstream_hashes` contains old file hash **Result**: Only re-analyze affected nodes, cache hits for everything else ### Audit and Compliance + **FR-018** (log conflicts): Complete record of: + - What was analyzed - When - Which stages ran @@ -307,19 +339,25 @@ Final confidence: 0.95 (Tier 3 validated) ## How to Use These Documents ### PROVENANCE_RESEARCH_REPORT.md + **Comprehensive deep-dive** (30+ pages) + - For: Technical leads, researchers, architects - Contains: Full analysis, trade-offs, architectural patterns - Use: Understanding complete context ### PROVENANCE_ENHANCEMENT_SPEC.md + **Implementation specification** (20+ pages) + - For: Developers implementing T079 - Contains: Code structures, migrations, task breakdown - Use: Direct implementation guidance ### RESEARCH_SUMMARY.md (this document) + **Quick reference** (5 pages) + - For: Decision makers, stakeholders, reviewers - Contains: Key findings, recommendations, effort estimate - Use: Understanding core insights @@ -356,16 +394,19 @@ Final confidence: 0.95 (Tier 3 validated) ## Files Provided ### 1. PROVENANCE_RESEARCH_REPORT.md + - **Size**: ~40 pages - **Content**: Complete research with analysis, comparisons, recommendations - **Audience**: Technical audience ### 2. PROVENANCE_ENHANCEMENT_SPEC.md + - **Size**: ~30 pages - **Content**: Implementation specification with code structures and tasks - **Audience**: Implementation team ### 3. RESEARCH_SUMMARY.md (this file) + - **Size**: ~10 pages - **Content**: Executive summary with key findings - **Audience**: Decision makers @@ -375,7 +416,9 @@ Final confidence: 0.95 (Tier 3 validated) ## Questions & Discussion ### Q: Why not just stick with T079 as-is (repository_id)? + **A**: Because: + 1. Incomplete FR-014 implementation (missing version, timestamp) 2. Can't debug why conflicts were detected (FR-018) 3. Can't verify cache is working (SC-CACHE-001) @@ -383,21 +426,27 @@ Final confidence: 0.95 (Tier 3 validated) 5. ReCoco provides data automatically (minimal extra effort); ThreadFlowBuilder operators already capture execution metadata ### Q: Isn't this a lot of extra work? + **A**: No, because: + 1. ReCoco provides data automatically (we don't build it) 2. Effort is organizing/storing/querying existing data 3. Better to do once comprehensively than piecemeal 4. Phased approach spreads effort over 1+ weeks ### Q: What if ReCoco changes its API? + **A**: Very low risk because: + 1. **Thread controls ReCoco** β€” it's our own fork (separate public crate maintained by Thread) 2. Any required API changes can be implemented directly in ReCoco without waiting 3. The bridge/adapter layer in thread-flow (bridge.rs) isolates changes 4. Worst case: lose detailed provenance, keep basic fingerprinting (which already exists) ### Q: Can we do this incrementally? + **A**: Yes: + 1. Phase 1: Types and schema (no functional change) 2. Phase 2: Storage (still no change) 3. Phase 3: Collection (data starts flowing) @@ -410,6 +459,7 @@ Final confidence: 0.95 (Tier 3 validated) **Status**: Complete (2026-02-24) The CodeWeaver semantic classification analysis produced pre-baked classification data now located at `/home/knitli/thread/classifications/`: + - `_universal_rules.json`: 2,444 exact + 21 majority cross-language patterns - `_categories.json`: 55 category β†’ SemanticClass mappings (one wrapper key stripped during migration) - `_scoring.json`: Per-class ImportanceScores + AgentTask profiles diff --git a/specs/001-realtime-code-graph/checklists/requirements.md b/specs/001-realtime-code-graph/checklists/requirements.md index 8c19d0a..0e92125 100644 --- a/specs/001-realtime-code-graph/checklists/requirements.md +++ b/specs/001-realtime-code-graph/checklists/requirements.md @@ -42,12 +42,14 @@ SPDX-License-Identifier: AGPL-3.0-or-later **Status**: βœ… PASSED - Specification is complete and ready for planning phase **Strengths**: + - Zero clarification markers needed (intelligent defaults documented in Assumptions) - Comprehensive service architecture criteria meeting constitutional requirements - Clear priority-based user story progression (P1-P4) - Well-bounded scope with explicit scalability targets **Next Steps**: + - Ready for `/speckit.plan` to generate implementation plan - Consider `/speckit.clarify` only if additional stakeholder input needed on documented assumptions diff --git a/specs/001-realtime-code-graph/contracts/rpc-types.rs b/specs/001-realtime-code-graph/contracts/rpc-types.rs index 9a7dfd3..4bee466 100644 --- a/specs/001-realtime-code-graph/contracts/rpc-types.rs +++ b/specs/001-realtime-code-graph/contracts/rpc-types.rs @@ -6,15 +6,19 @@ //! RPC Type Definitions for Real-Time Code Graph Intelligence //! //! These types are shared across CLI and Edge deployments for API consistency. -//! Serialization uses `serde` + `postcard` for binary efficiency (~40% size reduction vs JSON). +//! This file contains draft Rust structs as the precursor to `.proto` definitions (T017). +//! +//! **Wire Protocol**: +//! - External API (HTTP POST): `prost` Protobuf encoding β€” see `crates/thread-api/proto/v1/` (planned, T017) +//! - Internal Rust-to-Rust (Workerβ†’Container, CLI internal): `postcard` binary +//! - MCP server (future): `serde_json` / JSON-RPC 2.0 (separate adapter layer) //! //! **Protocol**: Custom RPC over HTTP + WebSockets (gRPC not viable for Cloudflare Workers) //! **Transport**: HTTP POST for request/response, WebSocket for real-time streaming -//! **Serialization**: postcard (binary) for production, JSON for debugging use serde::{Deserialize, Serialize}; -use std::collections::HashMap; use std::path::PathBuf; +use thread_utils::RapidMap; // ============================================================================ // Core RPC Trait @@ -32,10 +36,10 @@ pub trait CodeAnalysisRpc { async fn query_graph(&self, req: GraphQueryRequest) -> Result; /// Search for similar code patterns (semantic search) - async fn search_similar(&self, req: SimilaritySearchRequest) -> Result; - - /// Detect conflicts between code changes - async fn detect_conflicts(&self, req: ConflictDetectionRequest) -> Result; + async fn search_similar( + &self, + req: SimilaritySearchRequest, + ) -> Result; /// Get analysis session status async fn get_session_status(&self, session_id: String) -> Result; @@ -45,6 +49,20 @@ pub trait CodeAnalysisRpc { async fn subscribe_updates(&self, repo_id: String) -> Result; } +/// Extension trait for conflict detection capabilities (commercial scope). +/// +/// Implemented by commercial `thread-conflict` service; NOT implemented in OSS. +/// OSS callers should check for this trait via dynamic dispatch if needed. +/// The OSS `thread-conflict` stub crate does not implement this trait. +#[async_trait::async_trait] +pub trait ConflictAwareRpc: CodeAnalysisRpc { + /// Detect conflicts between code changes (multi-tier progressive) + async fn detect_conflicts( + &self, + req: ConflictDetectionRequest, + ) -> Result; +} + // ============================================================================ // Request/Response Types // ============================================================================ @@ -73,14 +91,18 @@ pub struct GraphQueryRequest { pub node_id: String, pub max_depth: Option, pub edge_types: Vec, + /// Whether to include local uncommitted delta in query results. + /// Defaults to `true` (merged Base + Delta view). + /// Pass `false` to query the committed Base Layer only. + pub include_local_delta: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] pub enum GraphQueryType { - Dependencies, // What does this symbol depend on? - Callers, // Who calls this function? - Callees, // What does this function call? - ReverseDependencies, // Who depends on this? + Dependencies, // What does this symbol depend on? + Callers, // Who calls this function? + Callees, // What does this function call? + ReverseDependencies, // Who depends on this? PathBetween { target_id: String }, // Find path between two symbols } @@ -128,16 +150,16 @@ pub struct ConflictDetectionRequest { #[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub enum DetectionTier { - Tier1AST, // Fast AST diff (<100ms) - Tier2Semantic, // Semantic analysis (<1s) - Tier3GraphImpact, // Graph impact (<5s) + Tier1AST, // Fast AST diff (<100ms) + Tier2Semantic, // Semantic analysis (<1s) + Tier3GraphImpact, // Graph impact (<5s) } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ConflictDetectionResponse { pub conflicts: Vec, pub total_time_ms: u64, - pub tier_timings: HashMap, // Tier name -> time in ms + pub tier_timings: RapidMap, // Tier name -> time in ms } /// Session status query @@ -173,7 +195,8 @@ pub struct UpdateSubscription { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GraphNode { pub id: String, - pub node_type: String, + pub semantic_class: String, // Language-agnostic SemanticClass (from thread-definitions, e.g. "DefinitionCallable") + pub node_kind: Option, // Raw tree-sitter node type (e.g., "function_item", "closure_expression") pub name: String, pub qualified_name: String, pub file_path: PathBuf, @@ -214,6 +237,18 @@ pub enum Severity { // WebSocket Message Types (Real-Time Updates) // ============================================================================ +/// Status of a conflict detection tier result. +/// +/// **Deferred-completion pattern**: If a tier times out (`Timeout, is_final: false`), the server +/// queues the analysis for retry. When the retry succeeds, a new `ConflictUpdate` is sent for the +/// same `conflict_id` and `tier` with `status: Complete`. Clients merge by `(conflict_id, tier)`; +/// the most recently received message wins. No explicit supersession signal is needed. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub enum ConflictUpdateStatus { + Complete, // Tier analysis completed successfully + Timeout, // Tier timed out. `is_final: true` = terminal, no retry queued; `is_final: false` = retry pending, a follow-up Complete message will arrive. +} + /// Messages sent over WebSocket for real-time updates #[derive(Debug, Clone, Serialize, Deserialize)] pub enum WebSocketMessage { @@ -228,6 +263,8 @@ pub enum WebSocketMessage { ConflictUpdate { conflict_id: String, tier: DetectionTier, + status: ConflictUpdateStatus, // Outcome of this tier's analysis + is_final: bool, // When true, no further ConflictUpdate messages follow for this conflict_id conflicts: Vec, timestamp: i64, }, @@ -258,6 +295,11 @@ pub enum WebSocketMessage { /// Error notification Error { code: String, message: String }, + + /// Client requests replay of missed messages after reconnect (Client β†’ Server) + RequestMissedUpdates { + since_timestamp: i64, // Unix timestamp of last received message before disconnection + }, } // ============================================================================ @@ -294,6 +336,8 @@ impl std::error::Error for RpcError {} // Serialization Helpers // ============================================================================ +// NOTE: serialize_binary/deserialize_binary use postcard β€” INTERNAL Rust-to-Rust communication only. +// External API wire format is prost Protobuf. See crates/thread-api/proto/v1/ (planned, T017). /// Serialize RPC request/response to binary (postcard) pub fn serialize_binary(value: &T) -> Result, postcard::Error> { postcard::to_allocvec(value) diff --git a/specs/001-realtime-code-graph/contracts/streaming-graph.md b/specs/001-realtime-code-graph/contracts/streaming-graph.md index 992f95e..ac75cf5 100644 --- a/specs/001-realtime-code-graph/contracts/streaming-graph.md +++ b/specs/001-realtime-code-graph/contracts/streaming-graph.md @@ -14,9 +14,10 @@ SPDX-License-Identifier: AGPL-3.0-or-later ## Core Principle **NEVER** load the full graph structure (`Graph`) into memory on the Edge. All graph operations must be: -1. **Lazy**: Fetch data only when requested. -2. **Streaming**: Process nodes/edges one by one or in small batches. -3. **Stateless**: Do not retain visited history in memory beyond the current traversal frontier. + +1. **Lazy**: Fetch data only when requested. +2. **Streaming**: Process nodes/edges one by one or in small batches. +3. **Stateless**: Do not retain visited history in memory beyond the current traversal frontier. ## Interface Definition @@ -76,5 +77,5 @@ async fn leads_to(&self, ancestor: &NodeId, descendant: &NodeId) -> Result ## Constraints -1. **Memory Cap**: The implementation MUST NOT buffer more than `batch_size` (default 100) items in memory. -2. **Recursion**: Recursive traversal algorithms (DFS/BFS) MUST be implemented iteratively using an external stack/queue stored in a Durable Object or handled via the Reachability Index, NOT via call-stack recursion. +1. **Memory Cap**: The implementation MUST NOT buffer more than `batch_size` (default 100) items in memory. +2. **Recursion**: Recursive traversal algorithms (DFS/BFS) MUST be implemented iteratively using an external stack/queue stored in a Durable Object or handled via the Reachability Index, NOT via call-stack recursion. diff --git a/specs/001-realtime-code-graph/contracts/websocket-protocol.md b/specs/001-realtime-code-graph/contracts/websocket-protocol.md index 63ef2d5..f03f3db 100644 --- a/specs/001-realtime-code-graph/contracts/websocket-protocol.md +++ b/specs/001-realtime-code-graph/contracts/websocket-protocol.md @@ -7,8 +7,8 @@ SPDX-License-Identifier: AGPL-3.0-or-later # WebSocket Protocol Specification -**Feature**: Real-Time Code Graph Intelligence -**Protocol Version**: 1.0 +**Feature**: Real-Time Code Graph Intelligence +**Protocol Version**: 1.0 **Last Updated**: 2026-01-11 ## Overview @@ -19,6 +19,9 @@ The WebSocket protocol enables real-time bidirectional communication between cli - Progressive conflict detection updates (Tier 1 β†’ Tier 2 β†’ Tier 3) - Live analysis session progress - Graph update streaming +- Missed-update replay on reconnect (`RequestMissedUpdates`) + +**Message Types**: CodeChangeDetected (Sβ†’C), ConflictUpdate (Sβ†’C), SessionProgress (Sβ†’C), GraphUpdate (Sβ†’C), Ping/Pong (keepalive), Error (Sβ†’C), RequestMissedUpdates (Cβ†’S) **Fallback Strategy**: WebSocket primary, Server-Sent Events (SSE) secondary, Long-Polling last resort @@ -84,6 +87,8 @@ For development/debugging, JSON serialization is supported: "type": "ConflictUpdate", "conflict_id": "conflict:abc123", "tier": "Tier1AST", + "status": "Complete", + "is_final": false, "conflicts": [...], "timestamp": 1704988800 } @@ -93,8 +98,8 @@ For development/debugging, JSON serialization is supported: ### 1. Code Change Detected -**Direction**: Server β†’ Client -**Trigger**: File change detected by indexer (file watcher or git poll) +**Direction**: Server β†’ Client +**Trigger**: File change detected by indexer (file watcher or git poll) **Latency Target**: <100ms from code change to client notification (FR-013) ```rust @@ -114,8 +119,8 @@ WebSocketMessage::CodeChangeDetected { ### 2. Conflict Update (Progressive) -**Direction**: Server β†’ Client -**Trigger**: Conflict detection tier completes +**Direction**: Server β†’ Client +**Trigger**: Conflict detection tier completes **Progressive Delivery**: Tier 1 (100ms) β†’ Tier 2 (1s) β†’ Tier 3 (5s) ```rust @@ -123,6 +128,8 @@ WebSocketMessage::CodeChangeDetected { WebSocketMessage::ConflictUpdate { conflict_id: "conflict:abc123".to_string(), tier: DetectionTier::Tier1AST, + status: ConflictUpdateStatus::Complete, + is_final: false, // Tier 2 and 3 still pending conflicts: vec![ Conflict { id: "conflict:abc123".to_string(), @@ -142,6 +149,8 @@ WebSocketMessage::ConflictUpdate { WebSocketMessage::ConflictUpdate { conflict_id: "conflict:abc123".to_string(), tier: DetectionTier::Tier2Semantic, + status: ConflictUpdateStatus::Complete, + is_final: false, // Tier 3 still pending conflicts: vec![ Conflict { id: "conflict:abc123".to_string(), @@ -161,6 +170,8 @@ WebSocketMessage::ConflictUpdate { WebSocketMessage::ConflictUpdate { conflict_id: "conflict:abc123".to_string(), tier: DetectionTier::Tier3GraphImpact, + status: ConflictUpdateStatus::Complete, + is_final: true, // Final tier β€” no further updates for this conflict_id conflicts: vec![ Conflict { id: "conflict:abc123".to_string(), @@ -177,17 +188,72 @@ WebSocketMessage::ConflictUpdate { } ``` -**Client UI Update**: +**Client UI Update**: 1. Show initial conflict immediately (Tier 1) 2. Refine details as Tier 2 completes (update confidence, severity) 3. Show comprehensive analysis when Tier 3 completes (final recommendation) +#### Tier Failure / Timeout + +If a tier fails to complete (analysis engine timeout, circuit breaker OPEN, engine crash), the server sends a `ConflictUpdate` with `status: Timeout`. Only `Timeout` paired with `is_final: true` is terminal (no further updates for this `conflict_id`). `Timeout` paired with `is_final: false` means a retry is queued and a follow-up `Complete` message will arrive: + +```rust +// Case 1: Timeout with no retry queued (terminal β€” is_final: true) +WebSocketMessage::ConflictUpdate { + conflict_id: "conflict:abc123".to_string(), + tier: DetectionTier::Tier2Semantic, + status: ConflictUpdateStatus::Timeout, + is_final: true, // No retry queued; this is the definitive result for this conflict_id + conflicts: vec![/* last known state from Tier 1 */], + timestamp: 1704988802, +} + +// Case 2: Timeout with retry queued (is_final: false β€” expect a follow-up Complete message) +WebSocketMessage::ConflictUpdate { + conflict_id: "conflict:abc123".to_string(), + tier: DetectionTier::Tier2Semantic, + status: ConflictUpdateStatus::Timeout, + is_final: false, // Retry queued; a subsequent Complete message will arrive for this (conflict_id, tier) + conflicts: vec![/* last known state from Tier 1 */], + timestamp: 1704988802, +} +// ... later, when the retry completes: +WebSocketMessage::ConflictUpdate { + conflict_id: "conflict:abc123".to_string(), + tier: DetectionTier::Tier2Semantic, + status: ConflictUpdateStatus::Complete, + is_final: false, // Tier 3 may still follow + conflicts: vec![/* full Tier 2 result */], + timestamp: 1704988815, +} +``` + +`is_final: true` signals the client that no further `ConflictUpdate` messages will follow for this `conflict_id`. The client should display the last known tier result as the definitive analysis. `is_final: false` with `status: Timeout` means a retry is queued; the client should keep the current result and apply any subsequent update for the same `(conflict_id, tier)` when it arrives (last-received-wins). + +**Tier failure does NOT generate a generic `Error` message.** Error messages are reserved for connection-level or session-level failures, not analysis tier failures. + +#### ConflictUpdate Status and Finality Fields + +The `ConflictUpdate` message includes two new fields: + +```rust +pub enum ConflictUpdateStatus { + Complete, // Tier analysis completed successfully + Timeout, // Tier timed out. `is_final: true` if no retry is queued; `is_final: false` if retry is pending. +} +``` + +- `status: ConflictUpdateStatus` β€” indicates outcome of the tier analysis +- `is_final: bool` β€” when `true`, no further `ConflictUpdate` messages will be sent for this `conflict_id` + +**Deferred-completion (retry) pattern**: When a tier times out, the server sends `status: Timeout`. If a retry is queued, `is_final: false` is set β€” the client should keep the last known result and expect a follow-up. When the retry completes, a new `ConflictUpdate` arrives for the same `conflict_id` and `tier` with `status: Complete`. Clients always apply the most recently received message for a given `(conflict_id, tier)` pair β€” newer messages implicitly supersede older ones for the same pair. There is no explicit `Superseded` status: last-received-wins is sufficient for conflict update merging. + --- ### 3. Session Progress -**Direction**: Server β†’ Client -**Trigger**: Analysis session makes progress +**Direction**: Server β†’ Client +**Trigger**: Analysis session makes progress **Frequency**: Every 10% of files processed, or every 5 seconds ```rust @@ -205,8 +271,8 @@ WebSocketMessage::SessionProgress { ### 4. Graph Update -**Direction**: Server β†’ Client -**Trigger**: Incremental graph update completes (CocoIndex diff applied) +**Direction**: Server β†’ Client +**Trigger**: Incremental graph update completes (CocoIndex diff applied) **Latency Target**: <100ms from code change to graph update notification ```rust @@ -226,8 +292,8 @@ WebSocketMessage::GraphUpdate { ### 5. Heartbeat (Keep-Alive) -**Direction**: Server β†’ Client (Ping), Client β†’ Server (Pong) -**Frequency**: Every 30 seconds +**Direction**: Server β†’ Client (Ping), Client β†’ Server (Pong) +**Frequency**: Every 30 seconds **Purpose**: Keep WebSocket connection alive, detect disconnections ```rust @@ -238,13 +304,13 @@ WebSocketMessage::Ping { timestamp: 1704988800 } WebSocketMessage::Pong { timestamp: 1704988800 } ``` -**Timeout**: If no Pong received within 60 seconds, server closes connection +**Timeout**: If no Pong received within 90 seconds (3 Γ— ping interval, configurable), server closes connection. The default of 3 Γ— ping interval provides resilience against single dropped packets and high-latency edge clients while remaining responsive to genuine disconnections. --- ### 6. Error Notification -**Direction**: Server β†’ Client +**Direction**: Server β†’ Client **Trigger**: Error during analysis, storage, or processing ```rust @@ -258,6 +324,34 @@ WebSocketMessage::Error { --- +### 7. Request Missed Updates (Client β†’ Server) + +**Direction**: Client β†’ Server +**Trigger**: Client reconnects after disconnection and requests replay of messages missed during the outage +**Use Case**: Ensures no conflict updates, graph changes, or session progress events are silently lost during network interruption + +```rust +WebSocketMessage::RequestMissedUpdates { + since_timestamp: 1704988750, // Unix timestamp of last received message +} +``` + +**Server Response**: Server replays all messages with `timestamp > since_timestamp` from the replay buffer, in chronological order, followed by a synthetic `SessionProgress` message indicating replay is complete. + +**Replay Buffer Limits**: +- Retention period: 5 minutes of messages retained per repository connection +- Maximum replay batch: 500 messages per reconnect request +- If `since_timestamp` is older than the retention window, server responds with `Error { code: "REPLAY_EXPIRED", message: "Reconnect gap exceeds 5-minute replay window; full re-sync required" }` + +**Deployment behavior**: +- **Commercial edge** (Durable Objects): Replay buffer maintained in DO storage. Full replay semantics as specified above. +- **OSS edge**: No replay buffer. Clients that reconnect receive `Error { code: "REPLAY_NOT_SUPPORTED", message: "Replay requires commercial deployment" }`. +- **OSS CLI**: No replay buffer currently. `RequestMissedUpdates` returns `Error { code: "REPLAY_NOT_SUPPORTED", message: "CLI replay buffer is a backlog item" }`. Clients should treat reconnect as a fresh connection. + +OSS CLI replay buffer is tracked as a backlog item. When implemented, it will maintain an in-memory buffer with configurable retention (default: 5 minutes). + +--- + ## Connection Lifecycle ### Successful Connection @@ -288,13 +382,16 @@ Client Server | | |<-- 101 Switching --------- | | | - |--- RequestMissedUpdates -> | (since last_timestamp) - |<-- ConflictUpdate -------- | (replay missed messages) + |--- RequestMissedUpdates -> | (since last_timestamp; see Message Type 7) + |<-- [replayed messages] --- | (all messages since last_timestamp, chronological) + |<-- SessionProgress ------- | (synthetic replay-complete marker) | | ``` **Reconnect Backoff**: 1s, 2s, 4s, 8s, 16s, 30s (max) +**Replay Protocol**: After reconnecting, clients SHOULD send `RequestMissedUpdates` with the Unix timestamp of the last message they received. On commercial edge (Durable Objects), the server replays all buffered messages newer than that timestamp; if the gap exceeds the 5-minute replay window, it returns `Error { code: "REPLAY_EXPIRED" }` and the client must perform a full re-sync. On OSS deployments (both CLI and edge), `RequestMissedUpdates` returns `Error { code: "REPLAY_NOT_SUPPORTED" }` and clients should treat the reconnect as a fresh connection. See Message Type 7 for full replay buffer limits and deployment-specific behavior. + --- ## Cloudflare Durable Objects Integration @@ -352,7 +449,7 @@ use worker::*; pub struct AnalysisSession { state: State, env: Env, - connections: HashMap, + connections: thread_utils::RapidMap, } #[durable_object] @@ -361,10 +458,10 @@ impl DurableObject for AnalysisSession { if req.headers().get("Upgrade")?.map(|v| v == "websocket").unwrap_or(false) { let pair = WebSocketPair::new()?; pair.server.accept()?; - + let session_id = uuid::Uuid::new_v4().to_string(); self.handle_websocket(session_id, pair.server).await?; - + Response::ok("")?.websocket(pair.client) } else { Response::error("Expected WebSocket", 400) @@ -379,8 +476,8 @@ impl DurableObject for AnalysisSession { ### Server-Sent Events (SSE) -**Endpoint**: `GET /sse/subscribe?repo_id={repository_id}` -**Use Case**: One-way serverβ†’client streaming, restrictive networks +**Endpoint**: `GET /sse/subscribe?repo_id={repository_id}` +**Use Case**: One-way serverβ†’client streaming, restrictive networks **Latency**: <100ms (same as WebSocket) **Format**: @@ -393,8 +490,8 @@ data: {"type": "SessionProgress", "files_processed": 1000, ...} ### Long-Polling -**Endpoint**: `GET /poll/updates?repo_id={repository_id}&since={timestamp}` -**Use Case**: Last resort for networks blocking WebSocket and SSE +**Endpoint**: `GET /poll/updates?repo_id={repository_id}&since={timestamp}` +**Use Case**: Last resort for networks blocking WebSocket and SSE **Latency**: 100-500ms (poll interval configurable) **Response**: @@ -412,7 +509,7 @@ data: {"type": "SessionProgress", "files_processed": 1000, ...} ## Security Considerations -1. **Authentication**: WebSocket connections require valid API token in `Authorization` header +1. **Authentication**: WebSocket connections require a valid API token in the `Authorization` header. **Exception**: CLI local-mode deployment (single-user, localhost-bound) does not require authentication per SC-AUTH-001. All authentication requirements here apply to service-mode and all edge deployments. 2. **Rate Limiting**: Max 1000 messages/second per connection 3. **Message Size**: Max 1MB per message 4. **Connection Limit**: Max 100 concurrent connections per repository @@ -433,6 +530,7 @@ data: {"type": "SessionProgress", "files_processed": 1000, ...} ## Performance Targets - **Connection Establishment**: <50ms (edge), <10ms (CLI) +- **Heartbeat Timeout**: 90 seconds default (3 Γ— 30s ping interval); configurable via server configuration - **Message Propagation**: <50ms (WebSocket), <100ms (SSE), 100-500ms (Polling) - **Heartbeat Overhead**: <100 bytes/minute per connection - **Binary vs JSON Size**: ~60% reduction (postcard vs JSON) diff --git a/specs/001-realtime-code-graph/data-model.md b/specs/001-realtime-code-graph/data-model.md index db574e2..5a44969 100644 --- a/specs/001-realtime-code-graph/data-model.md +++ b/specs/001-realtime-code-graph/data-model.md @@ -66,19 +66,36 @@ pub enum DependencyStrength { Strong, Weak } pub struct CodeRepository { pub id: RepositoryId, // Content-addressed hash of repo metadata pub source_type: SourceType, // Git, Local, S3, GitHub, GitLab - pub connection: ConnectionConfig, // Credentials, URL, auth tokens + pub connection_ref: CredentialRef, // Reference to credentials in secrets store (never the credential itself) pub sync_frequency: Duration, // How often to poll for changes pub last_sync: DateTime, // Last successful sync timestamp pub branch: String, // Primary branch to index (e.g., "main") pub file_patterns: Vec, // Glob patterns for files to index } +/// Describes WHERE and HOW to access a code source. +/// Credentials are never embedded here β€” they are always resolved at runtime +/// via `CodeRepository.connection_ref` (a `CredentialRef` lookup into the secrets store). pub enum SourceType { - Git { url: String, credentials: Option }, - Local { path: PathBuf }, - S3 { bucket: String, prefix: String, credentials: S3Credentials }, - GitHub { owner: String, repo: String, token: String }, - GitLab { project: String, token: String }, + Git { url: String }, // Credentials: connection_ref β†’ GitCredentials + Local { path: PathBuf }, // No credentials required + S3 { bucket: String, prefix: String }, // Credentials: connection_ref β†’ S3Credentials + GitHub { owner: String, repo: String }, // Token: connection_ref β†’ GitHub PAT or App token + GitLab { project: String }, // Token: connection_ref β†’ GitLab PAT +} + +/// Opaque reference to connection credentials stored in an external secrets manager. +/// The credentials themselves are never persisted with the entity. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct CredentialRef { + pub store: CredentialStore, // Which secrets backend holds this credential + pub key: Box, // Lookup key within that store +} + +pub enum CredentialStore { + EnvVar, // CLI: environment variable name + SystemKeychain, // CLI: OS keychain reference + CloudflareSecret, // Edge: Cloudflare Workers secret binding name } ``` @@ -95,6 +112,7 @@ pub enum SourceType { **Purpose**: Individual file in a repository with AST representation **Attributes**: + ```rust pub struct CodeFile { pub id: FileId, // Content-addressed hash of file content @@ -102,21 +120,33 @@ pub struct CodeFile { pub file_path: PathBuf, // Relative path from repository root pub language: Language, // Rust, TypeScript, Python, etc. (from thread-language) pub content_hash: ContentHash, // Blake3 hash of file content - pub ast: Root, // AST from thread-ast-engine pub last_modified: DateTime, // File modification timestamp pub size_bytes: u64, // File size for indexing metrics + // NOTE: `ast: Root` is intentionally ABSENT. tree-sitter's `Tree`/`Root` is an opaque + // C struct β€” not serializable, not persistable, and not Send. AST is obtained on demand + // via `tree_sitter_parse(source_bytes, language)`. For frequently-accessed files, use a + // separate AstCache (e.g., LRU thread_utils::RapidMap) owned by the analysis session, + // never stored in CodeFile itself. } -pub type FileId = String; // Format: "blake3:{hash}" +/// Newtype wrapper for file identifiers. Prevents accidental substitution of +/// other string-typed IDs. Format: `"blake3:{hash}"`. +#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +pub struct FileId(Box); +impl FileId { + pub fn as_str(&self) -> &str { &self.0 } +} pub type ContentHash = [u8; 32]; // Blake3 hash ``` **Relationships**: + - Many-to-one with `CodeRepository` (file belongs to one repository) - One-to-many with `GraphNode` (file contains multiple symbols) - Many-to-many with `ConflictPrediction` (file can have multiple conflicts) **Storage**: + - Metadata: Postgres/D1 table `files` - AST: Content-addressed cache (ReCoco) with file hash as key - Content: Not stored (re-fetched from source on demand) @@ -161,7 +191,29 @@ pub struct GraphNode { pub semantic_metadata: SemanticMetadata, // Language-specific analysis } -pub type NodeId = String; // Format: "node:{content_hash}" +/// `NodeId` is a content-addressed identifier for a code symbol. +/// +/// **Hash composition** (Decision D6): +/// `blake3(file_path_bytes || qualified_name_bytes || normalized_signature_bytes)` +/// +/// - `file_path_bytes`: ensures uniqueness across files for identical function names +/// - `qualified_name_bytes`: captures renames (fn process β†’ fn handle_payment = new NodeId) +/// - `normalized_signature_bytes`: whitespace-stripped, formatting-invariant representation +/// +/// **Invariant**: Provenance/analysis metadata (timestamps, sources, confidence scores, +/// branch refs) are NOT included in the hash. Same symbol content = same NodeId, +/// regardless of when or how it was analyzed. +/// +/// **Property test required**: `hash(same_content) == hash(same_content)` always holds. +/// Any change to normalization logic MUST re-verify all 27 validated languages. +/// +/// Newtype wrapper for node identifiers. Prevents accidental substitution of +/// other string-typed IDs. Format: `"node:{blake3_hex}"`. +#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +pub struct NodeId(Box); +impl NodeId { + pub fn as_str(&self) -> &str { &self.0 } +} ``` > **NodeType (retired)**: Previously defined as @@ -179,20 +231,30 @@ pub struct SourceLocation { } pub struct SemanticMetadata { - pub visibility: Visibility, // Public, Private, Protected - pub mutability: Option, // Mutable, Immutable (Rust-specific) - pub async_fn: bool, // Is async function? - pub generic_params: Vec, // Generic type parameters - pub attributes: HashMap, // Language-specific attributes + pub visibility: Visibility, // Public, Private, Protected (language-agnostic) + pub generic_params: Vec, // Generic type parameters (language-agnostic) + pub attributes: thread_utils::RapidMap, serde_json::Value>, // Language-specific metadata. + // Documented attribute keys: + // "mutability" β†’ bool β€” Rust: mutable binding or field + // "async" β†’ bool β€” Rust/JS/Python/Go: async function + // "unsafe" β†’ bool β€” Rust: unsafe fn or block + // "abstract" β†’ bool β€” Java/C#/Python: abstract method + // "static" β†’ bool β€” Java/C#/JS: static member + // "override" β†’ bool β€” Java/C#/Kotlin: overriding method + // "throws" β†’ [str] β€” Java: checked exception types + // "decorators" β†’ [str] β€” Python/TS: decorator names + // Keys follow snake_case convention. New keys may be added per language without schema migration. } ``` **Relationships**: + - Many-to-one with `CodeFile` (node belongs to one file) - Many-to-many with `GraphEdge` (node participates in many relationships) - One-to-many with `ConflictPrediction` (node can be source of conflicts) **Storage**: + - Metadata: Postgres/D1 table `nodes` - In-memory: Custom DependencyGraph (crates/flow/src/incremental/graph.rs) for complex queries (CLI only) β€” petgraph was evaluated but custom implementation was chosen - Edge Strategy: **Streaming/Iterator access only**. NEVER load full graph into memory. Use `D1GraphIterator` pattern. @@ -212,8 +274,10 @@ pub struct SemanticMetadata { > `semantic_class` is determined by the language-agnostic classifier. **Attributes**: + ```rust pub struct GraphEdge { + pub id: EdgeId, // Content-addressed edge identifier pub source_id: NodeId, // From node pub target_id: NodeId, // To node pub edge_type: EdgeType, // Relationship kind @@ -236,40 +300,81 @@ pub struct EdgeContext { pub conditional: bool, // Relationship is conditional (e.g., if statement) pub async_context: bool, // Relationship crosses async boundary } + +/// Content-addressed edge identifier. Derived from source_id + target_id + edge_type. +/// Format: `"edge:{blake3_hex}"`. +#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +pub struct EdgeId(Box); +impl EdgeId { + pub fn as_str(&self) -> &str { &self.0 } +} ``` **Relationships**: + - Many-to-one with `GraphNode` (edge connects two nodes) - Edges form the graph structure for traversal queries **Storage**: -- Postgres/D1 table `edges` with composite primary key `(source_id, target_id, edge_type)` + +- Postgres/D1 table `edges` with `id` (`EdgeId`) as primary key; composite index `(source_id, target_id, edge_type)` for uniqueness - Indexed on `source_id` and `target_id` for fast traversal - In-memory: DependencyGraph adjacency lists (thread-flow) β€” custom BFS/topological sort +- `GraphUpdate` WebSocket messages reference `EdgeId` values in `added_edges` and `removed_edges` fields --- ### Edge-Specific Optimizations (D1) -To overcome D1's single-threaded nature and Workers' memory limits, we utilize a **Reachability Index**. +To enable O(1) reachability lookups without recursive queries, we maintain a **Reachability Index**. + +#### Overlay Graph Query Default (FR-017) + +**Query default**: `include_local_delta` defaults to `true`. Queries without this parameter return the merged Base + Delta view. Pass `include_local_delta: false` to query the committed Base Layer only. + +#### Reachability Index β€” Dual Model (Decision D5, D8) + +The reachability index serves two complementary purposes: + +1. **Live session state** β€” tracks active thread instances and their current in-progress analysis; + stored in Container/Durable Object memory (ephemeral, authoritative for running sessions). +2. **Committed baseline** β€” tracks last committed graph state per branch/ref; + stored in D1 (persistent, authoritative for offline and divergence analysis). + +**Goal**: understand how feature branches are converging or diverging. Live data is the primary +source for real-time conflict queries; the committed baseline enables offline/divergence analysis +and comparison against a known good state. + +**k-Hop Bounded** (k=3 default, Decision D8): +NOT a full transitive closure. Full closure for 10M nodes β‰ˆ 800GB, which exceeds D1's 10GB limit. +Instead: -**Reachability Table (Transitive Closure)**: -Stores pre-computed "impact" paths to allow O(1) lookups for conflict detection without recursion. +- Pre-compute reachability up to **k=3 hops** from each changed node (configurable) +- Beyond k hops: **on-demand BFS** (streaming, does not materialize the full closure) +- Conflict detection queries beyond k hops use streaming BFS from the Container ```rust -// Table: reachability +// Table: reachability (D1 committed baseline β€” k-hop bounded) pub struct ReachabilityEntry { pub ancestor_id: NodeId, // Upstream node (e.g., modified function) pub descendant_id: NodeId, // Downstream node (e.g., affected API) - pub hops: u32, // Distance - pub path_hash: u64, // Hash of the path taken (for updates) + pub hops: u32, // Distance (≀ k, typically k=3) + pub path_hash: u64, // Hash of the path taken (for incremental updates) + pub branch_ref: String, // Git ref this baseline was computed from + pub computed_at: i64, // Unix timestamp of last computation (for staleness) } ``` **Reachability Logic**: -- **Write Path**: `ThreadBuildGraphFunction` computes transitive closure for changed nodes and performs `BATCH INSERT` into D1. -- **Read Path**: Conflict detection runs `SELECT descendant_id FROM reachability WHERE ancestor_id = ?` (single fast query). -- **Maintenance**: Incremental updates only recalculate reachability for the changed subgraph. + +- **Write Path**: `ThreadBuildGraphFunction` computes reachability up to k hops for changed nodes and performs `BATCH INSERT` into D1. +- **Read Path**: Queries run `SELECT descendant_id FROM reachability WHERE ancestor_id = ? AND hops <= ?` (O(1) index lookup within k-hop bound). +- **Beyond k hops**: Streaming BFS in Container; does NOT materialize the full transitive closure. +- **Maintenance**: Incremental updates only recalculate reachability for the changed subgraph (not the full graph). + +> **Note**: Conflict detection itself (consuming this index) is deferred to the commercial +> `thread-conflict` crate (Phase 4, commercial scope). The reachability index infrastructure +> (T034) is OSS and lives in `thread-storage`. --- @@ -277,7 +382,10 @@ pub struct ReachabilityEntry { **Purpose**: Represents a detected potential conflict between concurrent code changes +> **Type ownership**: These types are defined in `thread-api/src/types.rs` (OSS), not in `thread-conflict`. `thread-conflict` (commercial) imports them from `thread-api`. This ensures `thread-api` compiles independently of the commercial crate. + **Attributes**: + ```rust pub struct ConflictPrediction { pub id: ConflictId, // Unique conflict identifier @@ -292,8 +400,21 @@ pub struct ConflictPrediction { pub status: ConflictStatus, // Unresolved, Acknowledged, Resolved } -pub type ConflictId = String; // Format: "conflict:{hash}" -pub type UserId = String; +/// Newtype wrapper for conflict identifiers. Prevents accidental substitution of +/// other string-typed IDs. Format: `"conflict:{hash}"`. +#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +pub struct ConflictId(Box); +impl ConflictId { + pub fn as_str(&self) -> &str { &self.0 } +} +/// Newtype wrapper for user identifiers. Prevents accidental substitution of +/// other string-typed IDs. Value is the OAuth2/OIDC provider subject claim (`sub`). +/// Format: `"{provider}:{subject}"` (e.g., `"github:12345678"`). +#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +pub struct UserId(Box); +impl UserId { + pub fn as_str(&self) -> &str { &self.0 } +} pub enum ConflictType { SignatureChange, // Function signature modified @@ -334,7 +455,7 @@ pub enum ConflictStatus { **Relationships**: - Many-to-many with `CodeFile` (conflict affects multiple files) - Many-to-many with `GraphNode` (conflict involves multiple symbols) -- One-to-one with `AnalysisSession` (conflict detected during specific analysis) +- Many-to-one with `AnalysisSession` (many conflicts can be detected during one analysis session) **Storage**: - Postgres/D1 table `conflicts` @@ -352,6 +473,7 @@ pub struct AnalysisSession { pub id: SessionId, // Unique session identifier pub repository_id: RepositoryId, // Repository being analyzed pub session_type: SessionType, // Full, Incremental, OnDemand + pub git_ref: Option, // Git ref (commit SHA or branch name) being analyzed; None for non-VCS sources pub start_time: DateTime, // Session start pub completion_time: Option>, // Session end (None if running) pub files_analyzed: u32, // Count of files processed @@ -363,7 +485,13 @@ pub struct AnalysisSession { pub metrics: PerformanceMetrics, // Performance statistics } -pub type SessionId = String; // Format: "session:{timestamp}:{hash}" +/// Newtype wrapper for analysis session identifiers. Prevents accidental substitution of +/// other string-typed IDs. Format: `"session:{timestamp}:{hash}"`. +#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +pub struct SessionId(Box); +impl SessionId { + pub fn as_str(&self) -> &str { &self.0 } +} pub enum SessionType { FullAnalysis, // Complete repository scan @@ -381,10 +509,12 @@ pub struct PerformanceMetrics { ``` **Relationships**: + - Many-to-one with `CodeRepository` (session analyzes one repository) - One-to-many with `ConflictPrediction` (session detects multiple conflicts) **Storage**: + - Postgres/D1 table `analysis_sessions` - Metrics aggregated for dashboard/reporting @@ -395,6 +525,7 @@ pub struct PerformanceMetrics { **Purpose**: Represents a pluggable analysis component (parser, graph builder, conflict detector) **Attributes**: + ```rust pub struct PluginEngine { pub id: EngineId, // Unique engine identifier @@ -405,7 +536,13 @@ pub struct PluginEngine { pub enabled: bool, // Is this engine active? } -pub type EngineId = String; // Format: "engine:{type}:{name}" +/// Newtype wrapper for plugin engine identifiers. Prevents accidental substitution of +/// other string-typed IDs. Format: `"engine:{type}:{name}"`. +#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +pub struct EngineId(Box); +impl EngineId { + pub fn as_str(&self) -> &str { &self.0 } +} pub enum EngineType { Parser { language: Language }, // AST parsing engine (thread-ast-engine) @@ -415,7 +552,7 @@ pub enum EngineType { } pub struct EngineConfig { - pub params: HashMap, // Key-value configuration + pub params: thread_utils::RapidMap, // Key-value configuration pub enabled_languages: Vec, // Languages this engine supports pub performance_tuning: PerformanceTuning, // Resource limits } @@ -428,18 +565,59 @@ pub struct PerformanceTuning { ``` **Relationships**: + - Many-to-many with `AnalysisSession` (session uses multiple engines) - Engines are swappable via trait boundaries (Constitution Principle IV) **Storage**: + - Postgres/D1 table `plugin_engines` - Configuration managed via admin API or config files --- -## Entity Relationships Diagram +### 8. Delta (Overlay Graph β€” Uncommitted Changes) + +**Purpose**: Represents a developer's local uncommitted changes layered on top of the committed Base Layer (FR-017). The query engine merges Base + Delta at runtime to produce the Unified View without modifying persistent Base storage. +> **OSS/Commercial boundary**: In OSS CLI, Deltas are stored in-memory per process (single-user, process lifetime). Multi-developer Delta sharing β€” required for cross-developer conflict detection (US2) β€” is a commercial capability implemented via Durable Object storage. + +**Attributes**: + +```rust +// NOTE: Delta implementation scope β€” +// OSS CLI: In-memory only, single-user. Not shared across processes or connections. +// Commercial edge: Durable Object memory, shared across developers in the same repository session. +pub struct Delta { + pub user_id: UserId, + pub repository_id: RepositoryId, + pub session_id: SessionId, + pub changed_nodes: thread_utils::RapidMap, // Modified or added nodes (local state) + pub removed_nodes: thread_utils::RapidSet, // Nodes deleted in local working state + pub added_edges: Vec, // New relationships in local working state + pub removed_edges: thread_utils::RapidSet, // Removed relationships in local working state + pub base_ref: String, // Git ref this delta was forked from (e.g., "main@abc123") + pub created_at: DateTime, + pub last_updated: DateTime, +} ``` + +**Relationships**: + +- Many-to-one with `CodeRepository` (Delta applies changes within one repository) +- Many-to-one with `AnalysisSession` (Delta is owned by one analysis session) + +**Storage**: + +- OSS CLI: In-memory only (process lifetime; discarded on exit) +- Commercial edge: Durable Object storage (session lifetime; shared across WebSocket connections from the same developer) +- NOT persisted to Postgres or D1 β€” Deltas are ephemeral by design (FR-017) + +--- + +## Entity Relationships Diagram + +```plaintext CodeRepository (1) ────< (many) CodeFile β”‚ β”‚ β”‚ └──> (many) GraphNode ──┐ @@ -453,6 +631,7 @@ AnalysisSession ───> ConflictPrediction GraphEdge β”€β”€β”€β”€β”˜ ## Content-Addressed Storage Strategy **ReCoco Integration**: + - All entities use content-addressed IDs (Blake3 hashes) - Content changes β†’ new ID β†’ automatic cache invalidation - Incremental updates: diff old vs new IDs, update only changed nodes/edges @@ -465,6 +644,7 @@ content hashes is the target state for `thread-graph`/`thread-storage`. **Cache Hit Rate Target**: >90% (SC-CACHE-001) **Example**: + ```rust // Function signature changes let old_id = NodeId::from_content("fn process(x: i32)"); // "node:abc123..." @@ -481,11 +661,13 @@ db.update_edges_referencing(&old_id, &new_id)?; ## Schema Migrations **Version 1** (Initial Schema): + - Tables: `repositories`, `files`, `nodes`, `edges`, `conflicts`, `analysis_sessions`, `plugin_engines` - Indexes: `idx_edges_source`, `idx_edges_target`, `idx_nodes_type_name`, `idx_nodes_file` - Schema version tracked in `schema_version` table **Future Migrations**: + - Version 2: Add materialized views for reverse dependencies - Version 3: Add partitioning for large-scale deployments (>10M nodes) - Version 4: Add audit logging for conflict resolutions @@ -506,6 +688,7 @@ db.update_edges_referencing(&old_id, &new_id)?; ## Next Steps (Phase 2 - tasks.md) Based on this data model: + 1. Implement Rust struct definitions in appropriate crates 2. Generate database migration SQL for Postgres and D1 3. Implement ReCoco content-addressing for all entities (foundation exists in thread-flow via Blake3 fingerprinting) diff --git a/specs/001-realtime-code-graph/deep-architectural-research.md b/specs/001-realtime-code-graph/deep-architectural-research.md index 6b16c3c..3c66549 100644 --- a/specs/001-realtime-code-graph/deep-architectural-research.md +++ b/specs/001-realtime-code-graph/deep-architectural-research.md @@ -7,8 +7,8 @@ SPDX-License-Identifier: AGPL-3.0-or-later # Real-Time Code Graph Intelligence: Deep Architectural Research -**Research Date:** January 11, 2026 -**Scope:** CocoIndex integration, tree-sitter capabilities, architectural patterns +**Research Date:** January 11, 2026 +**Scope:** CocoIndex integration, tree-sitter capabilities, architectural patterns **Status:** Comprehensive analysis complete, architectural recommendation provided --- @@ -24,7 +24,7 @@ This deep research validates the **FINAL ARCHITECTURAL DECISION** made on Januar 1. **Tree-Sitter Usage**: Same parser count, different purposes - CocoIndex: 27 parsers for language-aware text chunking (shallow) - Thread: 26 parsers for deep AST analysis and pattern matching (deep) - + 2. **Complementary Capabilities**: - CocoIndex: Dataflow orchestration, incremental processing, content-addressed caching, multi-target storage - Thread: AST pattern matching, symbol extraction, relationship tracking, YAML-based rule engine @@ -39,7 +39,7 @@ This deep research validates the **FINAL ARCHITECTURAL DECISION** made on Januar - Maintain dual concurrency: tokio (I/O) + rayon (CPU) - Preserve dependency swappability via abstraction -5. **Architectural Decision** (FINAL, January 10, 2026): +5. **Architectural Decision** (FINAL, January 10, 2026): - **Path B committed**: Services + CocoIndex Dataflow with Rust-native integration - **Path C bypassed**: No validation prototype phase - proceeding directly to implementation - **Implementation**: Following PATH_B_IMPLEMENTATION_GUIDE (3-week timeline) @@ -74,7 +74,7 @@ What CocoIndex does NOT provide: **Evidence**: > "CocoIndex uses tree-sitter for better chunking, not semantic analysis. Their 'code embedding' example is generic text chunking with language-aware splitting." -> +> > "Technical evidence: CocoIndex has 27 tree-sitter parsers as direct dependencies (not 166). Most languages fall back to regex-based splitting. Their chunking is sophisticated but shallowβ€”they parse to chunk better, not to understand code." **Built-in Function** (from CocoIndex docs): @@ -285,7 +285,7 @@ builder From `.phase0-planning/04-architectural-review-jan9/COCOINDEX_API_ANALYSIS.md`: -**Python API Coverage**: ~30-40% of Rust functionality +**Python API Coverage**: ~30-40% of Rust functionality **Rust-Only APIs**: Service layer (HTTP), execution contexts, setup/migration internals **Core Rust Modules**: @@ -325,7 +325,7 @@ pub trait SourceFactory { #[async_trait] pub trait SourceExecutor: Send + Sync { - async fn read(&self, options: SourceExecutorReadOptions) + async fn read(&self, options: SourceExecutorReadOptions) -> Result>; } @@ -337,7 +337,7 @@ pub trait SimpleFunctionFactory { #[async_trait] pub trait SimpleFunctionExecutor: Send + Sync { - async fn evaluate(&self, input: Vec) + async fn evaluate(&self, input: Vec) -> Result; fn enable_cache(&self) -> bool; fn timeout(&self) -> Option; @@ -448,11 +448,11 @@ impl SimpleFunctionExecutor for ThreadParseExecutor { // Use thread-ast-engine to parse source let source = input[0].as_string()?; let ast = self.language.ast_grep(source); - + // Extract symbols, relationships, etc. let symbols = extract_symbols(&ast); let relationships = extract_relationships(&ast); - + // Return as CocoIndex Value Ok(Value::Struct(StructType { fields: vec![ @@ -475,12 +475,12 @@ impl SimpleFunctionExecutor for ThreadParseExecutor { ### 3.3 Benefits of This Approach -βœ… **Dependency Inversion**: Thread owns the abstraction, CocoIndex is one implementation -βœ… **Swappability**: Can replace CocoIndex with alternative dataflow engine -βœ… **API Stability**: External API remains stable even if internal implementation changes -βœ… **CocoIndex Rust API**: Full access to powerful Rust capabilities, not just Python bindings -βœ… **Performance**: Direct Rust-to-Rust calls, no PyO3 overhead -βœ… **Type Safety**: Compile-time validation of data flow +βœ… **Dependency Inversion**: Thread owns the abstraction, CocoIndex is one implementation +βœ… **Swappability**: Can replace CocoIndex with alternative dataflow engine +βœ… **API Stability**: External API remains stable even if internal implementation changes +βœ… **CocoIndex Rust API**: Full access to powerful Rust capabilities, not just Python bindings +βœ… **Performance**: Direct Rust-to-Rust calls, no PyO3 overhead +βœ… **Type Safety**: Compile-time validation of data flow ### 3.4 Nuance Considerations @@ -517,13 +517,14 @@ impl SimpleFunctionExecutor for ThreadParseExecutor { **Location**: `crates/utils/src/hash_help.rs` **Implementation Details**: + ```rust //! Thread uses rapidhash::RapidInlineHashMap and rapidhash::RapidInlineHashSet -//! as stand-ins for std::collections::HashMap/HashSet, but using the +//! as stand-ins for std::collections::HashMap/HashSet, but using the //! RapidInlineHashBuilder hash builder. //! -//! Important: rapidhash is not a cryptographic hash, and while it's a high -//! quality hash that's optimal in most ways, it hasn't been thoroughly tested +//! Important: rapidhash is not a cryptographic hash, and while it's a high +//! quality hash that's optimal in most ways, it hasn't been thoroughly tested //! for HashDoS resistance. use rapidhash::RapidInlineBuildHasher; @@ -683,7 +684,7 @@ From January 9 analysis, five critical blocking issues: - **Question**: Can request/response model effectively wrap streaming semantics? - **Resolution**: Prototype both approaches -#### 2. Performance Validation +#### 2. Performance Validation - **Problem**: CocoIndex optimized for I/O-bound, Thread is CPU-bound - **Question**: Do we get claimed efficiency gains for CPU-intensive parsing? - **Resolution**: Benchmark real workloads (1000-file codebase, change 10 files) @@ -771,12 +772,12 @@ Risk: Any CocoIndex integration must meet ALL of these criteria: -βœ… **Performance**: Within 10% of pure Thread implementation (or demonstrably better) -βœ… **Type Safety**: Thread's metadata preserved through transformations without loss -βœ… **Extraction Path**: Clear abstraction boundary enabling CocoIndex removal if needed -βœ… **API Stability**: Service trait contracts remain stable and backward compatible -βœ… **Incremental Efficiency**: Demonstrably faster updates when only subset of files change -βœ… **Complexity Justified**: Added abstraction layers pay for themselves with concrete benefits +βœ… **Performance**: Within 10% of pure Thread implementation (or demonstrably better) +βœ… **Type Safety**: Thread's metadata preserved through transformations without loss +βœ… **Extraction Path**: Clear abstraction boundary enabling CocoIndex removal if needed +βœ… **API Stability**: Service trait contracts remain stable and backward compatible +βœ… **Incremental Efficiency**: Demonstrably faster updates when only subset of files change +βœ… **Complexity Justified**: Added abstraction layers pay for themselves with concrete benefits --- @@ -784,9 +785,9 @@ Any CocoIndex integration must meet ALL of these criteria: ### 6.1 Context for Decision -**Current Date**: January 11, 2026 -**Task**: Real-Time Code Graph Intelligence (feature 001) -**Prior Analysis**: January 9, 2026 services vs dataflow evaluation +**Current Date**: January 11, 2026 +**Task**: Real-Time Code Graph Intelligence (feature 001) +**Prior Analysis**: January 9, 2026 services vs dataflow evaluation **Key Requirements for Real-Time Graph**: - Multi-tier conflict detection (<100ms β†’ 1s β†’ 5s) @@ -1020,13 +1021,13 @@ impl SimpleFunctionFactory for ThreadParseFunction { impl SimpleFunctionExecutor for ThreadParseExecutor { async fn evaluate(&self, input: Vec) -> Result { let source = input[0].as_string()?; - + // Use thread-ast-engine let ast = self.language.ast_grep(source); - + // Extract basic metadata let node_count = ast.root().descendants().count(); - + // Return as CocoIndex Value Ok(Value::Struct(StructType { fields: vec![ @@ -1113,10 +1114,10 @@ assert!(speedup > 20.0 || cache_hit_rate > 0.8); impl SimpleFunctionExecutor for ThreadExtractSymbolsExecutor { async fn evaluate(&self, input: Vec) -> Result { let parsed_ast = input[0].as_struct()?; - + // Use thread-language to extract symbols let symbols = extract_all_symbols(&parsed_ast.ast); - + Ok(Value::Array(symbols.into_iter().map(|s| { Value::Struct(StructType { fields: vec![ @@ -1135,10 +1136,10 @@ impl SimpleFunctionExecutor for ThreadExtractSymbolsExecutor { impl SimpleFunctionExecutor for ThreadRuleMatchExecutor { async fn evaluate(&self, input: Vec) -> Result { let ast = input[0].as_struct()?; - + // Use thread-rule-engine let matches = self.rule_collection.match_ast(&ast); - + Ok(Value::Array(matches.into_iter().map(|m| { Value::Struct(StructType { fields: vec![ @@ -1183,7 +1184,7 @@ impl GraphQueryService for CocoIndexGraphService { // Trigger CocoIndex flow execution let flow_ctx = self.lib_ctx.get_flow_context(&self.flow_name)?; let result = flow_ctx.execute_query("dependencies", file).await?; - + // Convert CocoIndex Value to Thread types Ok(convert_to_dependencies(result)) } @@ -1238,7 +1239,7 @@ pub struct FileWatcherSource { } impl SourceExecutor for FileWatcherExecutor { - async fn read(&self, options: SourceExecutorReadOptions) + async fn read(&self, options: SourceExecutorReadOptions) -> Result> { // Watch file system for changes // Emit change events as CocoIndex rows @@ -1268,8 +1269,8 @@ builder timeout_ms: 5000, }) .export("conflicts", PostgresTarget { table: "conflicts" }) - .export("realtime_updates", WebSocketTarget { - durable_object: "ConflictSubscriptions" + .export("realtime_updates", WebSocketTarget { + durable_object: "ConflictSubscriptions" }); ``` @@ -1291,7 +1292,7 @@ pub fn edge_rapidhash(bytes: &[u8]) -> u64 { // Durable Objects for WebSocket management #[cfg(feature = "cloudflare-edge")] pub struct ConflictSubscriptionsDurableObject { - subscriptions: HashMap, + subscriptions: thread_utils::RapidMap, } ``` @@ -1302,27 +1303,32 @@ pub struct ConflictSubscriptionsDurableObject { ### 8.1 Summary of Findings **CocoIndex and Thread Integration**: + - βœ… **Complementary**, not overlapping - βœ… CocoIndex: Dataflow orchestration, incremental processing, caching - βœ… Thread: Deep AST analysis, pattern matching, rule engine - βœ… Integration via dual-layer architecture with dependency inversion **Tree-Sitter Capabilities**: + - βœ… CocoIndex: 27 parsers for shallow text chunking - βœ… Thread: 26 parsers for deep AST analysis - βœ… No overlap - different purposes (chunking vs understanding) **Rule Engine**: + - βœ… thread-rule-engine is UNIQUE to Thread - βœ… No CocoIndex equivalent - βœ… Differentiating capability **Rapidhasher**: + - βœ… Must use Thread's rapidhash for ALL caching - βœ… High-performance non-cryptographic hash - βœ… Integration strategy defined **Architectural Decision (FINAL - January 10, 2026)**: + - βœ… **Path B committed**: Services + CocoIndex Dataflow with Rust-native integration - βœ… **Path C bypassed**: No validation prototype phase - βœ… **Implementation**: Following PATH_B_IMPLEMENTATION_GUIDE (3-week timeline, January 13-31) @@ -1340,6 +1346,7 @@ This research **validates and supports** the FINAL DECISION made on January 10, 5. **Unique Thread Capabilities Preserved**: thread-rule-engine has no CocoIndex equivalent and becomes a differentiating custom operator **Research Confirms Decision Rationale**: + - βœ… Thread is a **service-first architecture** (long-lived, persistent, real-time) - βœ… CocoIndex provides essential infrastructure (incremental updates, caching, storage) - βœ… Thread provides unique intelligence (AST analysis, rules, semantic understanding) @@ -1348,6 +1355,7 @@ This research **validates and supports** the FINAL DECISION made on January 10, ### 8.3 Implementation Status and Next Steps **Current Status** (as of January 11, 2026): + - βœ… FINAL DECISION committed (January 10): Path B (Services + CocoIndex Dataflow) - βœ… PATH_B_IMPLEMENTATION_GUIDE created (3-week timeline: January 13-31) - βœ… Deep architectural research complete (validates decision) @@ -1356,11 +1364,13 @@ This research **validates and supports** the FINAL DECISION made on January 10, **Implementation Reference**: `.phase0-planning/04-architectural-review-jan9/PATH_B_IMPLEMENTATION_GUIDE.md` **Key Implementation Milestones**: + - **Week 1** (Jan 13-17): Foundation & Design - CocoIndex Rust API mastery, Thread operator design - **Week 2** (Jan 20-24): Core Integration - Thread operators as CocoIndex functions - **Week 3** (Jan 27-31): Service Layer - Service traits, storage targets, testing **For Real-Time Code Graph (Feature 001)**: + - Use PATH_B architecture as foundation - Implement real-time capabilities (WebSocket, progressive conflict detection) as additional layer - Follow dual-layer pattern: Service traits (external) + CocoIndex dataflow (internal) @@ -1370,8 +1380,9 @@ This research **validates and supports** the FINAL DECISION made on January 10, --- -**Document Status**: Research Complete - Validates Final Decision (Path B) -**References**: +**Document Status**: Research Complete - Validates Final Decision (Path B) +**References**: + - `.phase0-planning/04-architectural-review-jan9/2026-01-10-FINAL_DECISION_PATH_B.md` -- `.phase0-planning/04-architectural-review-jan9/PATH_B_IMPLEMENTATION_GUIDE.md` +- `.phase0-planning/04-architectural-review-jan9/PATH_B_IMPLEMENTATION_GUIDE.md` **Decision Authority**: FINAL (January 10, 2026) diff --git a/specs/001-realtime-code-graph/plan.md b/specs/001-realtime-code-graph/plan.md index 8c623c0..d76c93e 100644 --- a/specs/001-realtime-code-graph/plan.md +++ b/specs/001-realtime-code-graph/plan.md @@ -11,6 +11,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later **Input**: Feature specification from `specs/001-realtime-code-graph/spec.md` **Phase Status**: + - βœ… Phase 0: Research complete (8 research tasks documented in research.md) - βœ… Phase 1: Design artifacts complete (data-model.md, contracts/, quickstart.md) - βœ… Phase 2: Task generation complete (tasks.md β€” Phase 0.5 through Phase 7, 55+ tasks) @@ -20,6 +21,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later Real-Time Code Graph Intelligence transforms Thread from a code analysis library into a persistent intelligence platform. The system provides performant, codebase-wide graph analysis with semantic/AST awareness, enabling real-time dependency tracking, conflict prediction, and collaborative development support. **Primary Requirements**: + - Build and maintain live code graph with <1s query response for 100k files - Detect merge conflicts before commit with multi-tier progressive detection (100ms β†’ 1s β†’ 5s) - Support dual deployment (CLI + Cloudflare Edge) from single codebase @@ -27,21 +29,24 @@ Real-Time Code Graph Intelligence transforms Thread from a code analysis library - Enable incremental updates affecting <10% of full analysis time **Technical Approach**: + - Service-library dual architecture with ReCoco dataflow orchestration - Multi-backend storage (Postgres for CLI, D1 for edge, Vectorize for edge vector search) - Trait-based abstraction for ReCoco integration (prevent type leakage) -- Custom RPC over HTTP unified API protocol (CLI + edge, pending WASM compatibility research) -- Progressive conflict detection (AST diff β†’ semantic β†’ graph impact) +- **API Protocol**: prost + plain HTTP POST for external API (no Connect-RPC/gRPC framing); postcard for internal Rust-to-Rust (Workerβ†’Container, CLI); JSON-RPC 2.0 for MCP server (future, separate adapter). All proto files use `package thread.v1;` namespace. Directory: `crates/thread-api/proto/v1/`. Add `buf.gen.yaml` for TypeScript codegen targeting `@bufbuild/protobuf` (protobuf-es v2). Version bump policy: field additions are backward-compatible; breaking changes require `v2/` directory. +- Conflict detection deferred to commercial `thread-conflict` crate (Phase 4) - Rayon parallelism (CLI) + tokio async (edge) concurrency models -**Technical Context** +### Technical Context **Language/Version**: Rust (edition 2024, aligning with Thread's existing codebase) **Primary Dependencies**: + - ReCoco framework v0.2.1 (content-addressed caching, dataflow orchestration) - **INTEGRATED** in thread-flow crate via bridge pattern + ThreadFlowBuilder DSL - tree-sitter (AST parsing foundation, existing Thread dependency) - workers-rs (Cloudflare Workers runtime for edge deployment) -- serde + postcard (binary serialization for RPC, ~40% size reduction vs JSON) +- prost (Protobuf encoding for external API, no_std/WASM-compatible); prost-build (host-only code gen, never in WASM binary) +- serde + postcard (internal Rust-to-Rust binary serialization: Workerβ†’Container service bindings, CLI internal calls) - rayon (CPU-bound parallelism for CLI, existing) - tokio (async I/O for edge deployment, existing) - tokio-postgres + deadpool-postgres (Postgres client for CLI storage, used in thread-flow) @@ -49,26 +54,32 @@ Real-Time Code Graph Intelligence transforms Thread from a code analysis library - cloudflare-vectorize (edge vector search - replaces Qdrant for edge deployment) - ~~petgraph~~ - NOT USED: thread-flow implements custom BFS/topological sort in incremental/graph.rs (1,099 lines) -**Edge Constraint Strategy**: -- **Memory Wall**: Strict 128MB limit. **NO** loading full graph into memory. Use streaming/iterator patterns (`D1GraphIterator`). +**Edge Deployment Architecture**: + +- **Cloudflare Containers (ReCoco/thread-flow)**: Heavy computation β€” indexing, graph construction, incremental analysis β€” runs in Cloudflare Containers (beta). Full tokio/async support; no WASM constraints. Resolves WASM incompatibility with ReCoco (D2). +- **Workers (thin WASM layer)**: Handles request routing, D1 native queries, Vectorize semantic search, and result serialization. OSS: single Worker (Rust/Python/TypeScript). Commercial: Router Worker + per-language Language Workers via service bindings. +- **Memory Wall (Workers only)**: Strict 128MB limit. **NO** loading full graph into Worker memory. Use streaming/iterator patterns (`D1GraphIterator`). - **Database-First**: Primary graph state lives in D1. In-memory structs are ephemeral (batch processing only). -- **Reachability Index**: Maintain a pre-computed transitive closure table in D1 to enable O(1) conflict detection without recursive queries. +- **Reachability Index**: k-hop bounded (k=3 default) β€” NOT a full transitive closure (full closure for 10M nodes β‰ˆ 800GB, exceeds D1 10GB limit). Tracks live session state (Container/DO memory) + committed baseline (D1). On-demand BFS beyond k hops. - **Throughput Governance**: Use ReCoco adaptive controls (max_inflight_bytes) (<80MB) and `Adaptive Batching` to manage resource pressure. **Storage**: Multi-backend architecture with deployment-specific primaries: + - Postgres (CLI deployment primary - full graph with ACID guarantees) -- D1 (edge deployment primary - distributed graph storage + **Reachability Index**) +- D1 (edge deployment primary - distributed graph storage + **Reachability Index**). Two implementations: `D1IncrementalBackend` (existing, REST API β€” for external tooling/CI); `D1NativeBackend` (planned, `worker::D1Database` native binding for in-Worker use β€” zero extra HTTP hop, enables SC-STORE-001 <50ms p95 target). Both implement `StorageBackend` trait. - Vectorize (edge vector search), Qdrant (CLI-only, optional β€” currently blocked by ReCoco dependency conflict) **Testing**: cargo nextest (constitutional requirement, all tests executed via nextest) **Target Platform**: Dual deployment targets: + - Native binary (Linux, macOS, Windows) for CLI - WASM (Cloudflare Workers) for edge deployment **Project Type**: Service-library dual architecture (both library crates AND persistent service components) **Performance Goals**: + - Query response <1s for codebases up to 100k files (FR-005, SC-001) - Conflict detection latency: <100ms (initial AST diff), <1s (semantic analysis), <5s (comprehensive graph analysis) (FR-006) - Real-time update propagation: <100ms from code change detection to client notification (FR-013) @@ -76,12 +87,14 @@ Real-Time Code Graph Intelligence transforms Thread from a code analysis library - Incremental update: <10% of full analysis time for changes affecting <5% of files (SC-INCR-002) **Constraints**: + - WASM bundle size: <10MB compressed for fast cold-start (SC-EDGE-003) - Storage latency targets (p95): Postgres <10ms, D1 <50ms, Vectorize (edge vectors) <100ms (SC-STORE-001) - Edge deployment global latency: <50ms p95 from any major city (commercial) (SC-EDGE-004) - Memory: Sublinear storage growth through deduplication, max 1.5x raw code size (SC-STORE-004) **Scale/Scope**: + - Initial target: 500k files, 10M graph nodes (expandable with infrastructure) - Concurrent users: 1000 simultaneous queries with <2s p95 response (SC-004) - Edge throughput: 10k requests/sec per geographic region (commercial) (SC-EDGE-005) @@ -184,7 +197,7 @@ The `thread-flow` crate (already in workspace) provides foundational infrastruct several planned crates. New crates should build on β€” not duplicate β€” this foundation: | thread-flow component | Provides for planned crate | -|---|---| +| --- | --- | | `incremental/graph.rs` (1,099 lines) | Core of `thread-graph` β€” BFS, topological sort, cycle detection | | `incremental/analyzer.rs` (636 lines) | Core of `thread-indexer` β€” incremental analysis coordinator | | `incremental/storage.rs` + backends | Core of `thread-storage` β€” StorageBackend trait, Postgres, D1 | @@ -198,7 +211,7 @@ several planned crates. New crates should build on β€” not duplicate β€” this fo The `thread-definitions` classification engine enables broad language coverage without per-language engineering work: | Coverage | Mechanism | Languages | -|----------|-----------|----------| +| ---------- | ----------- | ---------- | | 80%+ baseline | token_purpose + universal_exact rules (2,444 cross-language patterns) | Any tree-sitter grammar | | ~100% full | + TOML overrides (~10-50 lines/language) | All 27 currently validated | | Potential | tree-sitter-language-pack grammars + TOML | ~166 languages | @@ -207,6 +220,8 @@ This reframes FR-010 (multi-language support on Cloudflare): broad coverage is a **File-extension language identification**: CodeWeaver has ~200 language extension mappings. Porting these to `data/file_extensions.json` in thread-definitions would provide language detection for the full tree-sitter-language-pack without expanding the SupportLang enum. +> **Decision D-API-GRAPH**: The `thread-graph` crate's public API surface (all `pub use` exports, public trait signatures, and stable function signatures) MUST be documented in `specs/001-realtime-code-graph/contracts/` as `thread-graph-api.md` BEFORE contract tests (`T009`, `T027`) are written. This is an explicit gate: contract tests have nothing to verify against until the API surface is declared. The API surface document is the ground truth for contract tests. Emerges from TDD β€” the API is not pre-designed top-down but MUST be formally recorded once it stabilizes from test-driven discovery. + ```text crates/ β”œβ”€β”€ thread-graph/ # PARTIAL: Extend thread-flow/src/incremental/graph.rs β€” do NOT reimplement @@ -340,13 +355,15 @@ tests/ ``` **Dependency Graph** (acyclic, library-service separated): -``` + +```plaintext Service Layer (orchestration, persistence): thread-services (ReCoco traits) β”œβ”€> thread-storage (Postgres/D1/Vectorize) β”œβ”€> thread-realtime (WebSocket/SSE) └─> thread-api (Custom RPC over HTTP) - └─> thread-conflict (multi-tier detection) + # NOTE: thread-conflict (commercial) β†’ thread-api (not the reverse) + # Commercial crate imports conflict protocol types from thread-api; thread-api never depends on thread-conflict thread-flow (ReCoco integration layer - FOUNDATIONAL) β”œβ”€> recoco v0.2.1 (public crate) @@ -380,23 +397,33 @@ Edge Deployment: ``` **Structure Decision**: + - **Single Workspace Extension**: New graph-focused crates added to existing Thread workspace -- **Library-Service Boundary**: Clear separation (graph/indexer/conflict are library-reusable, storage/api/realtime are service-specific) -- **ReCoco Integration**: IMPLEMENTED via bridge.rs + ThreadFlowBuilder DSL in thread-flow. Bridge pattern + feature gating prevents type leakage. +- **Library-Service Boundary**: Clear separation (graph/indexer are library-reusable; storage/api/realtime are service-specific; thread-conflict is commercial/deferred) +- **ReCoco Integration**: SCAFFOLDED via bridge.rs + ThreadFlowBuilder DSL in thread-flow (bridge.rs = stubs only, must be implemented before T-C10) - **Acyclic Dependencies**: Top-down flow from services β†’ libraries, no circular references - **Component Selection**: Existing ast-grep components (ast-engine, language) reused, CodeWeaver evaluation deferred to Phase 2 (Research Task 2) +**Crate Ownership Boundary (D3)**: + +- `thread-services` = engine-agnostic orchestration traits ONLY (`DataSource`, `DataFunction`, `DataTarget`). ReCoco types NEVER appear in `thread-services` public API. +- `thread-flow` = the ReCoco implementation. Owns `bridge.rs`, `ThreadFlowBuilder`, storage backends, and the semantic query transform (bridge between `thread-ast-engine` and `thread-definitions`). +- All new crates (`thread-graph`, `thread-indexer`, etc.) depend on `thread-services` traits, NOT `thread-flow` directly. This prevents circular dependencies (thread-flow depends on these crates while also being their implementation) and preserves engine swappability. +- `thread-conflict` is Commercial/Deferred β€” Phase 4 tasks are out of OSS scope (see D4 decision). +- `thread-api/types.rs` owns shared conflict protocol types (`ConflictPrediction`, `ConflictType`, `Severity`, `DetectionTier`, `ConflictStatus`, `ResolutionStrategy`). `thread-conflict` (commercial) imports these from `thread-api` β€” it does not define them. + ## Complexity Tracking > **Fill ONLY if Constitution Check has violations that must be justified** | Violation | Why Needed | Simpler Alternative Rejected Because | -|-----------|------------|-------------------------------------| +| ----------- | ------------ | ------------------------------------- | | [e.g., 4th project] | [current need] | [why 3 projects insufficient] | | [e.g., Repository pattern] | [specific problem] | [why direct DB access insufficient] | **Phase 0.5 β€” Semantic Classification** (parallel workstream, prerequisite for T011) -``` + +```plaintext thread-definitions crate (parallel, prerequisite for T011): T-C01: Create thread-definitions crate skeleton T-C02: Implement types.rs (SemanticClass, ImportanceRank, TokenPurpose, etc.) @@ -413,7 +440,8 @@ thread-definitions crate (parallel, prerequisite for T011): ``` **Phase 1: Core Integration** (3 weeks, conditional on Phase 0 pass) -``` + +```plaintext Goal: Implement full Thread operator suite and storage backends Tasks: diff --git a/specs/001-realtime-code-graph/quickstart.md b/specs/001-realtime-code-graph/quickstart.md index ead6384..492c4ac 100644 --- a/specs/001-realtime-code-graph/quickstart.md +++ b/specs/001-realtime-code-graph/quickstart.md @@ -7,13 +7,14 @@ SPDX-License-Identifier: AGPL-3.0-or-later # Quickstart Guide: Real-Time Code Graph Intelligence -**Feature**: Real-Time Code Graph Intelligence -**Status**: Development +**Feature**: Real-Time Code Graph Intelligence +**Status**: Development **Target Audience**: Developers using Thread for code analysis ## Overview Thread's Real-Time Code Graph Intelligence provides: + - **Real-time dependency tracking** for codebases up to 500k files - **Conflict prediction** before code merge (95% accuracy, <10% false positives) - **Incremental analysis** (<10% of full scan time for typical changes) @@ -24,16 +25,19 @@ Thread's Real-Time Code Graph Intelligence provides: ### CLI Deployment (Local Development) **Prerequisites**: + - Rust 1.75+ (edition 2021) - Postgres 14+ (for persistent caching) - 8GB RAM minimum (16GB recommended for large codebases) **Install via cargo**: + ```bash cargo install thread-cli --features graph-intelligence ``` **Or build from source**: + ```bash git clone https://github.com/thread/thread.git cd thread @@ -44,10 +48,12 @@ cargo build --release --workspace ### Edge Deployment (Cloudflare Workers) **Prerequisites**: + - Cloudflare Workers account (paid plan for 10MB WASM limit) - Wrangler CLI installed (`npm install -g wrangler`) **Deploy to Cloudflare**: + ```bash # Build WASM binary mise run build-wasm-release @@ -60,6 +66,7 @@ wrangler tail ``` **Environment Variables**: + ```toml # wrangler.toml name = "thread-intelligence" @@ -82,7 +89,8 @@ thread init --repository /path/to/your/code --languages rust,typescript,python ``` **Output**: -``` + +```plaintext βœ“ Initialized Thread repository: repo:abc123 βœ“ Detected 1,234 files (Rust: 800, TypeScript: 300, Python: 134) βœ“ Created Postgres database: thread_repo_abc123 @@ -99,12 +107,14 @@ thread status --session ``` **Expected Time**: + - Small (<1k files): 10-30 seconds - Medium (1k-10k files): 1-5 minutes - Large (10k-100k files): 5-30 minutes **Output**: -``` + +```plaintext Analyzing repository repo:abc123... [=============> ] 54% (670/1234 files) Nodes created: 8,450 @@ -128,6 +138,7 @@ thread query --node "processPayment" --query-type dependencies --depth 2 ``` **Sample Output**: + ```json { "nodes": [ @@ -150,7 +161,8 @@ thread search --code "fn validate_input(user: &User) -> Result<(), Error>" --top ``` **Output**: -``` + +```plaintext Top 5 similar functions: 1. [0.92] validateUser (src/auth.rs:45) 2. [0.87] checkUserPermissions (src/permissions.rs:102) @@ -172,7 +184,8 @@ thread conflicts --compare main --files src/payment.rs --tiers 1,2,3 ``` **Progressive Output**: -``` + +```plaintext Tier 1 (AST Diff) - 95ms: ⚠ Potential conflict: Function signature changed Confidence: 0.6 @@ -199,7 +212,8 @@ thread watch --repository repo:abc123 ``` **Real-Time Feed**: -``` + +```plaintext [12:00:05] Code change detected: src/payment.rs [12:00:05] Conflict detected (Tier 1): SignatureChange (confidence: 0.6) [12:00:06] Conflict updated (Tier 2): BreakingAPIChange (confidence: 0.9) @@ -292,11 +306,13 @@ jobs: ### Issue: Slow Analysis (>5 minutes for 10k files) **Diagnosis**: + ```bash thread metrics --session --verbose ``` **Solutions**: + - Increase `parallel_workers` in `thread.toml` - Check Postgres connection (should be <10ms p95 latency) - Verify cache hit rate (>90% expected after first run) @@ -304,12 +320,14 @@ thread metrics --session --verbose ### Issue: High Memory Usage **Diagnosis**: + ```bash # Monitor memory during analysis thread analyze --repository repo:abc123 --profile-memory ``` **Solutions**: + - Reduce `parallel_workers` (trade speed for memory) - Increase `max_file_size_mb` to skip large files - Use incremental analysis instead of full scans @@ -317,11 +335,13 @@ thread analyze --repository repo:abc123 --profile-memory ### Issue: WebSocket Disconnections **Diagnosis**: + ```bash thread watch --repository repo:abc123 --debug ``` **Solutions**: + - Check network stability (WebSocket requires persistent connection) - Enable SSE fallback: `thread watch --fallback sse` - Enable polling fallback: `thread watch --fallback polling` @@ -335,9 +355,9 @@ thread watch --repository repo:abc123 --debug ## Support -- **Documentation**: https://thread.dev/docs/real-time-intelligence -- **GitHub Issues**: https://github.com/thread/thread/issues -- **Community Discord**: https://discord.gg/thread +- **Documentation**: +- **GitHub Issues**: +- **Community Discord**: --- diff --git a/specs/001-realtime-code-graph/research.md b/specs/001-realtime-code-graph/research.md index 290de07..fa7b34e 100644 --- a/specs/001-realtime-code-graph/research.md +++ b/specs/001-realtime-code-graph/research.md @@ -47,6 +47,7 @@ open-source crate. It IS the native Rust API. The integration is complete. - `thread_calls`: AST β†’ function call relationships 4. **Feature Gating** (type leakage prevention): + ```toml recoco-minimal = ["recoco/source-local-file"] # Default recoco-postgres = ["recoco-minimal", "recoco/target-postgres"] @@ -57,12 +58,14 @@ open-source crate. It IS the native Rust API. The integration is complete. can be implemented directly without waiting on external maintainers. **Validation Criteria** (all met): + - βœ… Zero ReCoco types in Thread public APIs - βœ… All dataflow operations testable without external Python dependencies - βœ… `cargo build --workspace` succeeds - βœ… `thread-flow` compiles to WASM for edge deployment **Original alternatives** (still accurately rejected): + - ❌ Direct Python Subprocess Integration: High overhead - ❌ PyO3 Embed Python Interpreter: Massive binary size, edge incompatible - ❌ Wait for CocoIndex Rust API: We built our own (ReCoco) @@ -78,6 +81,7 @@ open-source crate. It IS the native Rust API. The integration is complete. **Decision**: Use Existing Thread Components (ast-grep-derived) with Potential CodeWeaver Integration for Semantic Layer **Rationale**: + 1. **Existing Thread Infrastructure**: Thread already has `thread-ast-engine`, `thread-language`, `thread-rule-engine` vendored from ast-grep, tested and integrated. These provide solid AST parsing foundation. 2. **CodeWeaver Evaluation**: CodeWeaver is sister project (currently Python) with sophisticated semantic characterization layer. Spec mentions it as "optional integration" pending Rust portability assessment. @@ -87,6 +91,7 @@ open-source crate. It IS the native Rust API. The integration is complete. 4. **Alignment with Spec**: Spec Dependency 3 states "Existing Thread crates NOT guaranteed to be used" but provides "evaluation priority" guidance. CocoIndex evaluation comes first, then determine semantic layer needs. **Alternatives Considered**: + - βœ… **Use Existing ast-grep Components**: Proven, integrated, supports 20+ languages (Tier 1-3 from CLAUDE.md), fast AST parsing - ⚠️ **Port CodeWeaver to Rust**: High effort, unknown timeline, Pythonβ†’Rust portability unproven, defer until semantic analysis requirements are clearer - ❌ **Build Custom Semantic Layer**: Reinventing wheel, violates "don't rebuild what exists" principle @@ -94,16 +99,19 @@ open-source crate. It IS the native Rust API. The integration is complete. **Migration Plan**: **Phase 1 (MVP)**: Existing ast-grep components + - Use `thread-ast-engine` for AST parsing - Use `thread-language` for multi-language support - Use `thread-rule-engine` for pattern-based conflict detection (Tier 1: AST diff) **Phase 2 (Semantic Enhancement)**: Evaluate CodeWeaver integration + - Assess CodeWeaver's semantic characterization capabilities - Determine Rust portability (Pythonβ†’Rust) - If viable, integrate for Tier 2 semantic analysis (conflict detection accuracy refinement) **Phase 3 (Production Optimization)**: Refine based on metrics + - If CodeWeaver proves superior for semantic analysis, expand integration - If ast-grep components sufficient, optimize existing implementation - Decision driven by conflict detection accuracy metrics (95% target, <10% false positive from SC-002) @@ -127,11 +135,13 @@ gRPC via tonic is NOT viable for Cloudflare Workers due to fundamental platform 3. **Bundle Size Concerns**: tonic + dependencies would yield 5-10MB uncompressed, approaching the 10MB Worker limit before adding application logic Instead, leverage Cloudflare Workers' actual capabilities: + - **HTTP Fetch API**: Request/response via workers-rs - **WebSockets**: Real-time bidirectional streaming (supported natively) - **Shared Rust Types**: Compile-time type safety without gRPC overhead **Alternatives Considered**: + - ❌ **tonic (gRPC)**: Does NOT compile to WASM server-side, Workers platform incompatible, 5-10MB bundle size - ❌ **grpc-web**: Client-side only (tonic-web-wasm-client), still requires HTTP/2 backend, doesn't solve server-side WASM problem - ⚠️ **tarpc / Cap'n Proto**: No confirmed WASM compatibility, unclear Workers support, unproven for this use case @@ -142,6 +152,7 @@ Instead, leverage Cloudflare Workers' actual capabilities: **WASM Compatibility**: **Cloudflare Workers Platform Constraints:** + - **Target**: `wasm32-unknown-unknown` (NOT `wasm32-wasi`) - **Runtime**: V8 isolates, no TCP sockets, Fetch API only - **Bundle Limits**: Free tier 1MB compressed, Paid tier 10MB compressed @@ -149,6 +160,7 @@ Instead, leverage Cloudflare Workers' actual capabilities: - **Concurrency**: Single-threaded (no `tokio::spawn` for multi-threading) **Confirmed Working Pattern**: + ```rust use worker::*; @@ -172,6 +184,7 @@ app.get("/ws", |req, ctx| async move { ``` **Bundle Size Analysis (Edge Deployment)**: + - workers-rs runtime: 800KB β†’ 250KB compressed - serde + postcard: 200KB β†’ 60KB compressed - thread-ast-engine (minimal): 1.5MB β†’ 500KB compressed @@ -180,6 +193,7 @@ app.get("/ws", |req, ctx| async move { - **Total: ~3.8MB uncompressed β†’ ~1.3MB compressed** (with wasm-opt -Oz: ~900KB) **Performance Characteristics**: + - Cold Start: <50ms (Workers V8 isolate initialization) - RPC Latency: Local (same edge) <10ms, Cross-region 50-100ms - Serialization: postcard ~0.5ms, JSON ~1.2ms (2.4x slower) @@ -188,11 +202,13 @@ app.get("/ws", |req, ctx| async move { **Fallback Strategy**: If Custom RPC Development Proves Complex: + 1. **Phase 1**: Simple HTTP REST with JSON (fastest to implement, ~2MB optimized) 2. **Phase 2**: Add binary serialization (switch to postcard for 40% size reduction) 3. **Phase 3**: Add WebSocket streaming (real-time updates, polling fallback) For CLI Deployment (No WASM Constraints): + - Can freely use tonic/gRPC if desired - Or use same HTTP-based protocol for consistency - Shared trait ensures behavioral equivalence @@ -212,17 +228,20 @@ Use Postgres/D1 for persistent graph storage with adjacency list schema, combine **Rationale**: Why NOT Dedicated Graph Databases: + - **Memgraph/Neo4j**: Require separate infrastructure incompatible with Thread's dual deployment model (Postgres CLI + D1 Edge). Memgraph is 100x+ faster than Neo4j but only works as standalone system. - **SurrealDB**: Emerging technology, mixed performance reports, doesn't support both backends. - **Infrastructure Complexity**: Adding separate graph DB violates Thread's service-library architecture (Constitution Principle I). Why Hybrid Relational Works: + 1. **Dual Backend Support**: Single schema works across Postgres (CLI) and D1 (Edge) with no architectural changes. 2. **Content-Addressed Caching**: Achieves >90% cache hit rate requirement (Constitution Principle VI) through ReCoco integration. 3. **Performance Tiering**: Simple queries (1-2 hops) use indexed SQL; complex queries (3+ hops) load subgraphs into in-memory structures for traversal. 4. **Incremental Updates**: ReCoco dataflow triggers only affected subgraph re-analysis on code changes (Constitution Principle IV). **Alternatives Considered**: + - ❌ **Pure Postgres Recursive CTEs**: Performance degrades exponentially with depth and fan-out, string-based path tracking inefficient, D1's SQLite foundation limits concurrent writes - ❌ **Materialized Paths**: Good for hierarchical queries but inefficient for non-hierarchical graphs (code has circular dependencies), update overhead - ❌ **Neo4j/Memgraph**: Performance superior (Memgraph 114-132x faster than Neo4j, 400ms for 100k nodes) but cannot support dual Postgres/D1 deployment, requires separate infrastructure @@ -232,6 +251,7 @@ Why Hybrid Relational Works: **Query Patterns**: **Schema Design**: + ```sql CREATE TABLE nodes ( id TEXT PRIMARY KEY, -- Content-addressed hash @@ -259,6 +279,7 @@ CREATE INDEX idx_nodes_type_name ON nodes(type, name); ``` **Query Routing Strategy**: + - **1-2 Hop Queries**: Direct SQL with indexed lookups (<10ms Postgres, <50ms D1) - **3+ Hop Queries**: Load subgraph into custom `DependencyGraph`, execute in-memory algorithms, cache result - **Reverse Dependencies**: Materialized views for "who depends on me" hot queries @@ -266,21 +287,25 @@ CREATE INDEX idx_nodes_type_name ON nodes(type, name); **Scalability Analysis**: **Storage Requirements (10M nodes, 50M edges)**: + - Postgres: Nodes 5GB + Edges 5GB + Indexes 5GB = ~15GB total (fits comfortably) - D1: Same schema, distributed across CDN nodes, ReCoco caching reduces query load by >90% **Performance Projections**: + - **Postgres (CLI)**: 1-hop <2ms p95, 2-hop <10ms p95 βœ…, 3+ hop <50ms p95 (10ms load + 1ms traversal) - **D1 (Edge)**: Cached queries <5ms p95, 1-hop <20ms p95, 2-hop <50ms p95 βœ… - **Content-Addressed Cache Hit Rate**: >90% projected βœ… (constitutional requirement) **Implementation Notes**: + - Custom BFS/topological sort implementation (see `crates/flow/src/incremental/graph.rs`, 1,099 lines) β€” petgraph evaluated but custom implementation chosen for better integration with incremental update semantics - Implement incremental graph updates via ReCoco diff tracking - Composite indexes on `(source_id, edge_type)` and `(target_id, edge_type)` - Materialized views for hot reverse dependency queries **Actual Implementation** (2026-02-24): `crates/flow/src/incremental/graph.rs` provides: + - Custom `DependencyGraph` with bidirectional adjacency lists - BFS affected-file detection (O(V+E)) - Topological sort for correct reanalysis order @@ -309,6 +334,7 @@ CREATE INDEX idx_nodes_type_name ON nodes(type, name); 4. **Polling Graceful Degradation**: Long-polling fallback for networks that block WebSocket and SSE. Higher latency but ensures universal compatibility. **Alternatives Considered**: + - ❌ **gRPC Server-Side Streaming**: Not supported by Cloudflare Workers runtime (confirmed in API Protocol research) - βœ… **WebSocket (Primary)**: Native Workers support, bidirectional, <50ms global latency, works for progressive conflict detection - βœ… **Server-Sent Events (Fallback)**: HTTP/1.1 compatible, restrictive network friendly, one-way sufficient for many use cases @@ -317,12 +343,14 @@ CREATE INDEX idx_nodes_type_name ON nodes(type, name); **Durable Objects Usage**: Cloudflare Durable Objects enable stateful edge operations: + - **Connection Management**: Track active WebSocket connections per user/project - **Session State**: Maintain user analysis sessions across requests - **Collaborative State**: Coordinate multi-user conflict detection and resolution - **Real-Time Coordination**: Propagate code changes to all connected clients within 100ms **Implementation Pattern**: + ```rust // Durable Object for session management #[durable_object] @@ -352,6 +380,7 @@ impl DurableObject for AnalysisSession { **Progressive Conflict Detection Streaming**: Multi-tier results update clients in real-time: + 1. **Tier 1 (AST diff)**: <100ms β†’ WebSocket message β†’ Client shows initial conflict prediction 2. **Tier 2 (Semantic)**: <1s β†’ WebSocket update β†’ Client refines conflict details with accuracy score 3. **Tier 3 (Graph impact)**: <5s β†’ WebSocket final update β†’ Client shows comprehensive analysis with severity ratings @@ -371,18 +400,19 @@ pub async fn connect_realtime(server: &str) -> Result { if let Ok(ws) = connect_websocket(server).await { return Ok(RealtimeClient::WebSocket(ws)); } - + // Fallback to SSE if let Ok(sse) = connect_sse(server).await { return Ok(RealtimeClient::SSE(sse)); } - + // Last resort: polling Ok(RealtimeClient::LongPolling(connect_polling(server).await?)) } ``` **Performance Characteristics**: + - WebSocket: <50ms global propagation, <10ms same-edge - SSE: <100ms propagation, <20ms same-edge - Long-Polling: 100-500ms latency (poll interval configurable) @@ -410,16 +440,19 @@ pub async fn connect_realtime(server: &str) -> Result { **Crate Responsibilities**: **NEW Library Crates** (reusable, WASM-compatible): + - `thread-graph`: Core graph data structures, traversal algorithms, pathfinding (depends on: thread-utils) - `thread-indexer`: Multi-source code indexing, file watching, change detection (depends on: thread-ast-engine, thread-language) - `thread-conflict`: Conflict detection engine (multi-tier: AST diff, semantic, graph) (depends on: thread-graph, thread-ast-engine) **NEW Service Crates** (persistence, orchestration): + - `thread-storage`: Multi-backend storage abstraction (Postgres/D1/Qdrant traits) (depends on: thread-graph) - `thread-api`: RPC protocol (HTTP+WebSocket), request/response types (depends on: thread-graph, thread-conflict) - `thread-realtime`: Real-time update propagation, WebSocket/SSE handling, Durable Objects integration (depends on: thread-api) **EXISTING Crates** (extended/reused): + - `thread-services`: **EXTENDED** - Add ReCoco dataflow traits, registry, YAML spec parser (depends on: all new crates) - `thread-ast-engine`: **REUSED** - AST parsing foundation (no changes) - `thread-language`: **REUSED** - Language support (no changes) @@ -428,7 +461,8 @@ pub async fn connect_realtime(server: &str) -> Result { - `thread-wasm`: **EXTENDED** - Add edge deployment features for new crates (depends on: thread-api, thread-realtime) **Dependency Graph**: -``` + +```plaintext β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ thread-services β”‚ (Service orchestration, ReCoco) β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ @@ -462,6 +496,7 @@ pub async fn connect_realtime(server: &str) -> Result { **Library-Service Split**: **Library Crates** (embeddable, reusable): + - thread-graph - thread-indexer - thread-conflict @@ -470,6 +505,7 @@ pub async fn connect_realtime(server: &str) -> Result { - thread-utils (existing) **Service Crates** (deployment-specific): + - thread-services (orchestration) - thread-storage (persistence) - thread-api (network protocol) @@ -497,11 +533,12 @@ pub async fn connect_realtime(server: &str) -> Result { **Tier 1 (AST Diff)**: <100ms for initial detection **Algorithm**: Git-style tree diff on AST structure + ```rust pub fn ast_diff(old_ast: &Root, new_ast: &Root) -> Vec { let old_symbols = extract_symbols(old_ast); // Functions, classes, etc. let new_symbols = extract_symbols(new_ast); - + let mut conflicts = Vec::new(); for (name, old_node) in old_symbols { if let Some(new_node) = new_symbols.get(&name) { @@ -528,7 +565,8 @@ pub fn ast_diff(old_ast: &Root, new_ast: &Root) -> Vec { } ``` -**Data Structures**: +**Data Structures**: + - Hash-based symbol tables for O(n) diff - Structural hashing for subtree comparison (similar to Git's tree objects) - Content-addressed AST nodes for efficient comparison @@ -536,10 +574,11 @@ pub fn ast_diff(old_ast: &Root, new_ast: &Root) -> Vec { **Tier 2 (Semantic Analysis)**: <1s for accuracy refinement **Techniques**: + 1. **Type Inference**: Resolve type signatures to detect breaking changes - Example: `fn process(x)` β†’ `fn process(x: i32)` may or may not break callers - Infer types of call sites to determine if change is compatible - + 2. **Control Flow Analysis**: Detect behavioral changes - Example: Adding early return changes execution paths - Compare control flow graphs (CFG) to identify semantic shifts @@ -548,30 +587,32 @@ pub fn ast_diff(old_ast: &Root, new_ast: &Root) -> Vec { - Example: Changing variable assignment order may affect results - Use reaching definitions and use-def chains -**Integration Point**: +**Integration Point**: + - If using CodeWeaver (from Research Task 2), leverage its semantic characterization layer - Otherwise, implement minimal semantic analysis using thread-ast-engine metadata **Tier 3 (Graph Impact Analysis)**: <5s for comprehensive validation **Algorithm**: Graph reachability and impact propagation + ```rust pub async fn graph_impact_analysis( changed_nodes: &[NodeId], graph: &CodeGraph, ) -> ImpactReport { let mut impact = ImpactReport::new(); - + for node in changed_nodes { // Find all downstream dependencies (who uses this?) let dependents = graph.reverse_dependencies(node, max_depth=10); - + // Classify severity based on dependency count and criticality let severity = classify_severity(dependents.len(), node.criticality); - + // Find alternative paths if this breaks let alternatives = graph.find_alternative_paths(dependents); - + impact.add_conflict(GraphConflict { symbol: node, affected_count: dependents.len(), @@ -580,12 +621,13 @@ pub async fn graph_impact_analysis( confidence: 0.95, // High confidence from comprehensive analysis }); } - + impact } ``` **Graph Operations** (using custom `DependencyGraph` from `crates/flow/src/incremental/graph.rs`; petgraph was evaluated but not used β€” see Research Task 4): + - Reverse dependency traversal (BFS from changed nodes) - Strongly connected components (detect circular dependencies affected by change) - Shortest path alternative detection (suggest refactoring paths) @@ -593,6 +635,7 @@ pub async fn graph_impact_analysis( **Progressive Streaming**: How results update clients in real-time **WebSocket Protocol** (from Research Task 5): + ```rust pub enum ConflictUpdate { TierOneComplete { conflicts: Vec, timestamp: DateTime }, @@ -607,30 +650,31 @@ pub async fn stream_conflict_detection( ) -> Result<()> { // Tier 1: AST diff (fast) let tier1 = ast_diff(parse(old_code), parse(new_code)); - ws.send(ConflictUpdate::TierOneComplete { + ws.send(ConflictUpdate::TierOneComplete { conflicts: tier1.clone(), timestamp: now(), }).await?; - + // Tier 2: Semantic analysis (medium) let tier2 = semantic_analysis(tier1, parse(old_code), parse(new_code)).await; ws.send(ConflictUpdate::TierTwoRefinement { updated: tier2.clone(), timestamp: now(), }).await?; - + // Tier 3: Graph impact (comprehensive) let tier3 = graph_impact_analysis(&tier2, &load_graph()).await; ws.send(ConflictUpdate::TierThreeComplete { final_report: tier3, timestamp: now(), }).await?; - + Ok(()) } ``` **Client Experience**: + 1. **Immediate Feedback (100ms)**: "Potential conflict detected in function signature" (low confidence) 2. **Refined Accuracy (1s)**: "Breaking change confirmed - 15 callers affected" (medium confidence) 3. **Comprehensive Analysis (5s)**: "High severity - critical path affected, 3 alternative refactoring strategies suggested" (high confidence) @@ -638,6 +682,7 @@ pub async fn stream_conflict_detection( **Intelligent Tier Routing**: Not all conflicts need all three tiers. Route based on confidence: + ```rust pub fn should_run_tier2(tier1_result: &[ASTConflict]) -> bool { // Skip semantic analysis if Tier 1 has high confidence @@ -651,6 +696,7 @@ pub fn should_run_tier3(tier2_result: &[SemanticConflict]) -> bool { ``` **Performance Optimization**: + - Parallel tier execution where possible (Tier 2 and 3 can start before Tier 1 completes if working on different symbols) - Cache intermediate results in ReCoco (content-addressed AST nodes reused across tiers) - Early termination if high-confidence result achieved before final tier @@ -701,24 +747,24 @@ conflict is resolved. pub trait GraphStorage: Send + Sync { /// Store graph nodes (symbols) async fn store_nodes(&self, nodes: &[GraphNode]) -> Result<()>; - + /// Store graph edges (relationships) async fn store_edges(&self, edges: &[GraphEdge]) -> Result<()>; - + /// Query nodes by ID async fn get_nodes(&self, ids: &[NodeId]) -> Result>; - + /// Query edges by source/target async fn get_edges(&self, source: NodeId, edge_type: EdgeType) -> Result>; - + /// Graph traversal (1-2 hops, optimized per backend) - async fn traverse(&self, start: NodeId, depth: u32, edge_types: &[EdgeType]) + async fn traverse(&self, start: NodeId, depth: u32, edge_types: &[EdgeType]) -> Result; - + /// Reverse dependencies (who calls/uses this?) async fn reverse_deps(&self, target: NodeId, edge_types: &[EdgeType]) -> Result>; - + /// Backend-specific optimization hook async fn optimize_for_query(&self, query: &GraphQuery) -> Result; } @@ -727,7 +773,7 @@ pub trait GraphStorage: Send + Sync { pub trait VectorStorage: Send + Sync { /// Store vector embeddings for semantic search async fn store_vectors(&self, embeddings: &[(NodeId, Vec)]) -> Result<()>; - + /// Similarity search (k-nearest neighbors) async fn search_similar(&self, query: &[f32], k: usize) -> Result>; } @@ -736,10 +782,10 @@ pub trait VectorStorage: Send + Sync { pub trait StorageMigration: Send + Sync { /// Apply schema migration async fn migrate_up(&self, version: u32) -> Result<()>; - + /// Rollback schema migration async fn migrate_down(&self, version: u32) -> Result<()>; - + /// Get current schema version async fn current_version(&self) -> Result; } @@ -748,6 +794,7 @@ pub trait StorageMigration: Send + Sync { **Backend-Specific Optimizations**: **Postgres Implementation**: + ```rust pub struct PostgresStorage { pool: PgPool, @@ -755,7 +802,7 @@ pub struct PostgresStorage { #[async_trait::async_trait] impl GraphStorage for PostgresStorage { - async fn traverse(&self, start: NodeId, depth: u32, edge_types: &[EdgeType]) + async fn traverse(&self, start: NodeId, depth: u32, edge_types: &[EdgeType]) -> Result { // Use recursive CTE for multi-hop queries let query = sqlx::query(r#" @@ -776,10 +823,10 @@ impl GraphStorage for PostgresStorage { .bind(&edge_types) .fetch_all(&self.pool) .await?; - + Ok(TraversalResult::from_rows(query)) } - + async fn optimize_for_query(&self, query: &GraphQuery) -> Result { // PostgreSQL-specific: EXPLAIN ANALYZE for query planning Ok(QueryPlan::UseIndex("idx_edges_source")) @@ -788,6 +835,7 @@ impl GraphStorage for PostgresStorage { ``` **D1 Implementation** (Cloudflare Edge): + ```rust pub struct D1Storage { db: D1Database, @@ -800,7 +848,7 @@ impl GraphStorage for D1Storage { // D1/SQLite: Use PRAGMA for performance self.db.exec("PRAGMA journal_mode=WAL").await?; self.db.exec("PRAGMA synchronous=NORMAL").await?; - + // Same recursive CTE as Postgres (SQLite compatible) let query = self.db.prepare(r#" WITH RECURSIVE traversal AS ( @@ -819,10 +867,10 @@ impl GraphStorage for D1Storage { .bind(edge_types)? .all() .await?; - + Ok(TraversalResult::from_d1_rows(query)) } - + async fn optimize_for_query(&self, query: &GraphQuery) -> Result { // D1-specific: Leverage edge CDN caching Ok(QueryPlan::CacheHint { ttl: Duration::from_secs(300) }) @@ -831,6 +879,7 @@ impl GraphStorage for D1Storage { ``` **Qdrant Implementation** (Vector Search): + ```rust pub struct QdrantStorage { client: QdrantClient, @@ -847,18 +896,18 @@ impl VectorStorage for QdrantStorage { .with_payload(payload!({ "node_id": id.to_string() })) }) .collect(); - + self.client .upsert_points(&self.collection, points, None) .await?; Ok(()) } - + async fn search_similar(&self, query: &[f32], k: usize) -> Result> { let results = self.client .search_points(&self.collection, query.to_vec(), k as u64, None, None, None) .await?; - + Ok(results.result.into_iter() .map(|p| (NodeId::from(p.payload["node_id"].as_str().unwrap()), p.score)) .collect()) @@ -869,6 +918,7 @@ impl VectorStorage for QdrantStorage { **Migration Strategy**: **Schema Versioning**: + ```sql -- migrations/001_initial_schema.sql CREATE TABLE schema_version (version INTEGER PRIMARY KEY); @@ -900,11 +950,12 @@ DELETE FROM schema_version WHERE version = 1; ``` **Migration Execution**: + ```rust impl StorageMigration for PostgresStorage { async fn migrate_up(&self, version: u32) -> Result<()> { let migration = load_migration(version)?; - + // Execute in transaction let mut tx = self.pool.begin().await?; sqlx::query(&migration.up_sql).execute(&mut *tx).await?; @@ -913,13 +964,13 @@ impl StorageMigration for PostgresStorage { .execute(&mut *tx) .await?; tx.commit().await?; - + Ok(()) } - + async fn migrate_down(&self, version: u32) -> Result<()> { let migration = load_migration(version)?; - + let mut tx = self.pool.begin().await?; sqlx::query(&migration.down_sql).execute(&mut *tx).await?; sqlx::query("UPDATE schema_version SET version = $1") @@ -927,7 +978,7 @@ impl StorageMigration for PostgresStorage { .execute(&mut *tx) .await?; tx.commit().await?; - + Ok(()) } } @@ -936,6 +987,7 @@ impl StorageMigration for PostgresStorage { **Resilience Patterns**: **Connection Pooling**: + ```rust pub struct PostgresStorage { pool: PgPool, // sqlx connection pool @@ -955,6 +1007,7 @@ impl PostgresStorage { ``` **Retry Logic** (exponential backoff): + ```rust pub async fn with_retry(operation: F) -> Result where @@ -976,6 +1029,7 @@ where ``` **Circuit Breaker**: + ```rust pub struct CircuitBreaker { state: Arc>, diff --git a/specs/001-realtime-code-graph/research/PROVENANCE_ENHANCEMENT_SPEC.md b/specs/001-realtime-code-graph/research/PROVENANCE_ENHANCEMENT_SPEC.md index 4dadfa6..1945f7f 100644 --- a/specs/001-realtime-code-graph/research/PROVENANCE_ENHANCEMENT_SPEC.md +++ b/specs/001-realtime-code-graph/research/PROVENANCE_ENHANCEMENT_SPEC.md @@ -26,7 +26,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use thread_utils::RapidMap; /// Represents the version of source code being analyzed #[derive(Debug, Clone, Serialize, Deserialize)] @@ -48,7 +48,7 @@ pub struct SourceVersion { pub version_timestamp: DateTime, /// Additional context (branch name, tag, storage class, etc.) - pub metadata: HashMap, + pub metadata: RapidMap, } /// Represents a single step in the analysis pipeline @@ -82,7 +82,7 @@ pub struct LineageRecord { pub cache_hit: bool, /// Operation-specific metadata - pub metadata: HashMap, + pub metadata: RapidMap, } /// Types of operations in the analysis pipeline @@ -378,7 +378,7 @@ impl GraphEdge { use crate::ConflictPrediction; use crate::provenance::{Provenance, LineageRecord}; use chrono::{DateTime, Utc}; -use std::collections::HashMap; +use thread_utils::RapidMap; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ConflictProvenance { @@ -718,33 +718,38 @@ pub struct TraceConflictResponse { ### 4.1 Core Tasks -**T079.1: Provenance Module Creation** +#### T079.1: Provenance Module Creation + - File: `crates/thread-graph/src/provenance.rs` - Define: `SourceVersion`, `LineageRecord`, `OperationType`, `EdgeCreationMethod`, `Provenance` - Tests: Unit tests for provenance type conversions - Time estimate: 3-4 hours -**T079.2: GraphNode/GraphEdge Updates** +#### T079.2: GraphNode/GraphEdge Updates + - File: `crates/thread-graph/src/node.rs` and `edge.rs` - Add provenance fields (with `Option` for backward compat) - Implement helper methods (`get_lineage`, `should_reanalyze`, etc.) - Tests: Serialization tests, schema validation - Time estimate: 2-3 hours -**T079.3: Conflict Provenance Module** +#### T079.3: Conflict Provenance Module + - File: `crates/thread-conflict/src/provenance.rs` - Define: `ConflictProvenance`, `TierResults`, `UpstreamChange` - Link to conflict detection results - Time estimate: 2-3 hours -**T079.4: Database Schema & Migrations** +#### T079.4: Database Schema & Migrations + - Files: `migrations/postgres/003_*.sql` and `migrations/d1/003_*.sql` - Create: All provenance tables - Implement: Migration runner logic - Tests: Schema validation - Time estimate: 3-4 hours -**T079.5: Storage Implementation** +#### T079.5: Storage Implementation + - Files: `crates/thread-storage/src/{postgres,d1}.rs` - Implement: `ProvenanceStore` trait (new file: `src/provenance.rs`) - Add: Node/edge persistence with provenance @@ -752,21 +757,24 @@ pub struct TraceConflictResponse { - Tests: Integration tests with real database - Time estimate: 4-5 hours -**T079.6: Provenance Query API** +#### T079.6: Provenance Query API + - File: `crates/thread-api/src/provenance_api.rs` (new file) - Implement: `ProvenanceQuery` trait methods - Add: Query handler implementations - Tests: Query correctness, performance - Time estimate: 5-6 hours -**T079.7: CocoIndex Integration** +#### T079.7: CocoIndex Integration + - File: `crates/thread-services/src/dataflow/provenance_collector.rs` (new) - Create: `ProvenanceCollector` that extracts ExecutionRecords - Wire: Collection during flow execution - Tests: End-to-end provenance flow - Time estimate: 5-6 hours -**T079.8: Documentation & Examples** +#### T079.8: Documentation & Examples + - Update: `crates/thread-graph/src/lib.rs` documentation - Add: Examples of provenance queries - Create: Debugging guide ("How to trace why a conflict was detected?") @@ -780,7 +788,7 @@ pub struct TraceConflictResponse { ### 4.3 Dependency Graph -``` +```plaintext T079.1 (Provenance types) ↓ T079.2 (GraphNode/Edge updates) ← Depends on T079.1 @@ -804,18 +812,21 @@ T079.8 (Documentation) ← Depends on all above ### 5.1 Phased Rollout -**Phase 1: Optional Provenance** +#### Phase 1: Optional Provenance + - All provenance fields are `Option` - Existing nodes continue to work - New analyses automatically include provenance - No schema change required immediately -**Phase 2: Migration** +#### Phase 2: Migration + - Backfill historical nodes (lazy evaluation) - Run migration script: `scripts/backfill_provenance.sql` - Generates minimal provenance for existing nodes -**Phase 3: Required Provenance** +#### Phase 3: Required Provenance + - After Phase 2, make provenance required - All queries validate provenance present - Better audit trail and debugging @@ -860,25 +871,29 @@ WHERE NOT EXISTS ( ### 6.2 Test Scenarios -**Scenario 1: Basic Provenance** +#### Scenario 1: Basic Provenance + - Parse a file - Store node with provenance - Query: Retrieve complete lineage - Verify: All stages present, timestamps match -**Scenario 2: Conflict Audit** +#### Scenario 2: Conflict Audit + - Detect a conflict - Store with conflict provenance - Query: Get analysis trace for conflict - Verify: All tiers documented, timing correct -**Scenario 3: Incremental Update** +#### Scenario 3: Incremental Update + - Change one source file - Use provenance to identify affected nodes - Re-analyze only affected nodes - Verify: Cache hits for unaffected nodes -**Scenario 4: Cross-Repository** +#### Scenario 4: Cross-Repository + - Index two repositories - Query provenance for cross-repo dependency - Verify: Both source versions tracked @@ -888,22 +903,27 @@ WHERE NOT EXISTS ( ## 7. Recommended Rollout Timeline **Week 1**: + - T079.1-T079.3: Define all provenance types (parallel) - Code review and approval **Week 2**: + - T079.4-T079.5: Database and storage (sequential) - Integration testing **Week 3**: + - T079.6: Query API (depends on storage completion) - API testing **Week 4**: + - T079.7: CocoIndex integration (depends on foundation complete) - End-to-end testing **Week 5**: + - T079.8: Documentation and cleanup - QA and validation diff --git a/specs/001-realtime-code-graph/research/PROVENANCE_RESEARCH_INDEX.md b/specs/001-realtime-code-graph/research/PROVENANCE_RESEARCH_INDEX.md index c71289b..4a8dada 100644 --- a/specs/001-realtime-code-graph/research/PROVENANCE_RESEARCH_INDEX.md +++ b/specs/001-realtime-code-graph/research/PROVENANCE_RESEARCH_INDEX.md @@ -17,14 +17,17 @@ SPDX-License-Identifier: AGPL-3.0-or-later ## Research Deliverables ### 1. RESEARCH_SUMMARY.md (START HERE) + **Purpose**: Executive summary and quick reference **Length**: ~10 pages **Best For**: + - Decision makers and stakeholders - 30-minute overview needed - Understanding core findings quickly **Key Sections**: + - Quick Findings (the answer to the research question) - Executive Summary (context and importance) - Technical Details (CocoIndex architecture) @@ -37,29 +40,33 @@ SPDX-License-Identifier: AGPL-3.0-or-later --- ### 2. PROVENANCE_RESEARCH_REPORT.md (COMPREHENSIVE ANALYSIS) + **Purpose**: Complete technical research with full analysis **Length**: ~40 pages **Best For**: + - Technical leads and architects - Deep understanding of CocoIndex capabilities - Understanding trade-offs and decisions - Research validation and verification **Key Sections**: + - Executive Summary (findings summary) - 1. CocoIndex Native Provenance Capabilities (detailed) -- 2. Current T079 Implementation Scope (what's missing) -- 3. Comparative Analysis (cocoindex vs T079) -- 4. Enhanced FR-014 Implementation (with code examples) -- 5. Use Cases Enabled (concrete benefits) -- 6. Implementation Recommendations -- 7. Missed Opportunities Summary -- 8. Recommended Implementation Order -- 9. Architecture Diagrams -- 10. Conclusion and Next Steps -- 11. Research Sources and References +- 1. Current T079 Implementation Scope (what's missing) +- 1. Comparative Analysis (cocoindex vs T079) +- 1. Enhanced FR-014 Implementation (with code examples) +- 1. Use Cases Enabled (concrete benefits) +- 1. Implementation Recommendations +- 1. Missed Opportunities Summary +- 1. Recommended Implementation Order +- 1. Architecture Diagrams +- 1. Conclusion and Next Steps +- 1. Research Sources and References **Contains**: + - Full comparative matrix (CocoIndex vs T079) - Use case walkthroughs with examples - Risk mitigation strategies @@ -72,48 +79,52 @@ SPDX-License-Identifier: AGPL-3.0-or-later --- ### 3. PROVENANCE_ENHANCEMENT_SPEC.md (IMPLEMENTATION GUIDE) + **Purpose**: Detailed specification for T079 implementation **Length**: ~30 pages **Best For**: + - Implementation team members - Software architects - Database schema designers - API designers **Key Sections**: + - 1. Data Model Enhancements - New provenance types (SourceVersion, LineageRecord, etc.) - Updated GraphNode structure - Updated GraphEdge structure - Conflict provenance types -- 2. Storage Schema Changes +- 1. Storage Schema Changes - PostgreSQL migrations - D1 (Cloudflare) schema -- 3. API Additions +- 1. API Additions - ProvenanceQuery trait - RPC type extensions -- 4. Implementation Tasks (Updated T079) +- 1. Implementation Tasks (Updated T079) - Task breakdown: T079.1 through T079.8 - Effort estimates - Dependency graph -- 5. Backward Compatibility Strategy +- 1. Backward Compatibility Strategy - Phased rollout approach - Migration scripts -- 6. Success Validation +- 1. Success Validation - Metrics to track - Test scenarios -- 7. Recommended Rollout Timeline +- 1. Recommended Rollout Timeline - Week-by-week schedule -- 8. Risk Mitigation +- 1. Risk Mitigation **Contains**: + - Complete Rust code examples - SQL migration scripts - Task list with time estimates @@ -127,6 +138,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later --- ### 4. PROVENANCE_RESEARCH_INDEX.md (THIS FILE) + **Purpose**: Navigation guide for all research documents **Contains**: This document - how to use all the research @@ -135,6 +147,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later ## How to Use These Documents ### For Decision Makers + 1. **Start**: RESEARCH_SUMMARY.md 2. **Focus on**: - "Quick Findings" section @@ -144,6 +157,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later 4. **Outcome**: Understanding of findings and recommended action ### For Technical Leads + 1. **Start**: RESEARCH_SUMMARY.md (quick context) 2. **Deep Dive**: PROVENANCE_RESEARCH_REPORT.md 3. **Focus on**: @@ -154,6 +168,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later 5. **Outcome**: Understanding of technical approach and decisions ### For Implementation Team + 1. **Start**: RESEARCH_SUMMARY.md (15 min overview) 2. **Reference**: PROVENANCE_RESEARCH_REPORT.md (understand "why") 3. **Implement using**: PROVENANCE_ENHANCEMENT_SPEC.md @@ -165,6 +180,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later 6. **Outcome**: Production-ready implementation ### For Architects + 1. **Start**: RESEARCH_SUMMARY.md (quick context) 2. **Analysis**: PROVENANCE_RESEARCH_REPORT.md 3. **Focus on**: @@ -180,25 +196,30 @@ SPDX-License-Identifier: AGPL-3.0-or-later ## Research Question & Answer ### Question + **How can CocoIndex's native provenance tracking enhance FR-014 ("System MUST track analysis provenance showing which data source, version, and timestamp each graph node originated from") compared to T079's current "repository_id only" approach?** ### Answer (Quick) + CocoIndex has sophisticated automatic lineage tracking that captures source versions, transformation pipelines, cache status, execution timeline, and upstream dependencies. T079's current scope (repository_id only) misses 80% of valuable provenance data. By leveraging CocoIndex's native capabilities, we can fully implement FR-014, enable incremental update optimization, debug conflict detection, and create complete audit trails - with only slightly more effort than the current approach. ### Answer (Extended) -**See RESEARCH_SUMMARY.md "Key Findings" section for full details** + +#### See RESEARCH_SUMMARY.md "Key Findings" section for full details --- ## Key Findings at a Glance ### Finding 1: CocoIndex Architecture Supports Provenance + - βœ“ Each stage of the pipeline is tracked automatically - βœ“ Input/output hashes available - βœ“ Execution times and cache status captured - βœ“ Queryable via ExecutionRecords API ### Finding 2: Current T079 Scope Gap + - βœ“ Adds: repository_id - βœ— Missing: source_version - βœ— Missing: source_timestamp @@ -206,7 +227,8 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers - βœ— Missing: cache status - βœ— Missing: upstream_hashes -### Finding 3: Enhanced Provenance Enables... +### Finding 3: Enhanced Provenance Enables + - Conflict detection debugging (which tiers ran?) - Cache effectiveness validation (cache hits really happening?) - Incremental update optimization (which nodes to re-analyze?) @@ -214,6 +236,7 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers - Stale analysis detection (is this analysis fresh?) ### Finding 4: Effort & Value Trade-off + - **Effort**: 25-35 hours (1-2 weeks) - **Value**: Complete FR-014 compliance + incremental optimization + debugging tools - **Risk**: Low (backward compatible, phased approach) @@ -224,24 +247,28 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers ## Implementation Roadmap ### Phase 1: Foundation (Week 1) + - Define provenance types - Update GraphNode/GraphEdge - **Tasks**: T079.1, T079.2, T079.3 - **Effort**: 8-10 hours ### Phase 2: Storage (Week 2) + - Create database migrations - Implement storage persistence - **Tasks**: T079.4, T079.5 - **Effort**: 8-10 hours ### Phase 3: Collection (Week 3) + - Implement query APIs - Build CocoIndex integration - **Tasks**: T079.6, T079.7 - **Effort**: 10-12 hours ### Phase 4: Validation (Week 4) + - Documentation and examples - Testing and validation - **Tasks**: T079.8 @@ -254,6 +281,7 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers ## Key Documents Referenced ### From the Codebase + - `specs/001-realtime-code-graph/spec.md` - FR-014 requirement - `specs/001-realtime-code-graph/data-model.md` - Current schema - `specs/001-realtime-code-graph/tasks.md` - T079 task @@ -263,6 +291,7 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers - `CLAUDE.md` - Project architecture ### From This Research + - `RESEARCH_SUMMARY.md` - Executive summary - `PROVENANCE_RESEARCH_REPORT.md` - Complete analysis - `PROVENANCE_ENHANCEMENT_SPEC.md` - Implementation spec @@ -273,7 +302,7 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers ## Quick Reference: What Each Document Answers | Question | Answer Location | -|----------|-----------------| +| ---------- | ----------------- | | What did you find? | RESEARCH_SUMMARY.md - Quick Findings | | Why does this matter? | RESEARCH_SUMMARY.md - Why It Matters | | What's the recommendation? | RESEARCH_SUMMARY.md - Recommendations | @@ -291,22 +320,26 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers ## Recommended Reading Order ### If You Have 30 Minutes + 1. RESEARCH_SUMMARY.md - Read all sections 2. Decision: Accept or decline enhanced T079 scope ### If You Have 90 Minutes + 1. RESEARCH_SUMMARY.md - Read all 2. PROVENANCE_RESEARCH_REPORT.md - Sections 1-4 3. PROVENANCE_ENHANCEMENT_SPEC.md - Section 4 (task list) 4. Decision and preliminary planning ### If You Have 3+ Hours + 1. RESEARCH_SUMMARY.md - Complete 2. PROVENANCE_RESEARCH_REPORT.md - Complete 3. PROVENANCE_ENHANCEMENT_SPEC.md - Complete 4. Detailed implementation planning ### If You're Implementing + 1. RESEARCH_SUMMARY.md - 15 minute overview 2. PROVENANCE_RESEARCH_REPORT.md - Sections 4-5 (why this matters) 3. PROVENANCE_ENHANCEMENT_SPEC.md - Section 1-4 (what to code) @@ -317,7 +350,7 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers ## Key Statistics | Metric | Value | -|--------|-------| +| -------- | ------- | | Research Duration | 4+ hours | | Comprehensive Report | 40 pages | | Implementation Spec | 30 pages | @@ -333,29 +366,35 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers ## Next Steps After Reading ### Step 1: Understand (30 min) + - Read RESEARCH_SUMMARY.md - Understand key findings ### Step 2: Decide (30 min) + - Accept expanded T079 scope (recommended) - Or: Justify sticking with repository_id only ### Step 3: Plan (1-2 hours) + - Assign T079.1-T079.8 tasks to team members - Schedule 4-week implementation phase - Allocate resources ### Step 4: Prepare (1 hour) + - Review PROVENANCE_ENHANCEMENT_SPEC.md - Identify technical questions - Prepare development environment ### Step 5: Implement (1-2 weeks) + - Follow phased approach - Reference spec during coding - Validate with test scenarios ### Step 6: Validate (3-5 days) + - Run test scenarios - Verify incremental updates - Confirm audit trails work @@ -376,6 +415,7 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers **Research**: Comprehensive analysis of CocoIndex provenance capabilities **Sources**: + - CocoIndex architectural documentation - Thread project specifications and code - Real-Time Code Graph Intelligence feature requirements @@ -387,6 +427,7 @@ CocoIndex has sophisticated automatic lineage tracking that captures source vers ## Contact & Questions For questions about this research: + 1. **Quick answers**: RESEARCH_SUMMARY.md FAQ section 2. **Technical details**: PROVENANCE_RESEARCH_REPORT.md relevant sections 3. **Implementation**: PROVENANCE_ENHANCEMENT_SPEC.md task descriptions @@ -394,6 +435,6 @@ For questions about this research: --- -**End of Index** +## End of Index Start with **RESEARCH_SUMMARY.md** for a quick overview, or choose your document above based on your role and available time. diff --git a/specs/001-realtime-code-graph/research/PROVENANCE_RESEARCH_REPORT.md b/specs/001-realtime-code-graph/research/PROVENANCE_RESEARCH_REPORT.md index 230332e..3465ee7 100644 --- a/specs/001-realtime-code-graph/research/PROVENANCE_RESEARCH_REPORT.md +++ b/specs/001-realtime-code-graph/research/PROVENANCE_RESEARCH_REPORT.md @@ -32,6 +32,7 @@ This research evaluates CocoIndex's native provenance tracking capabilities and ### Recommendation **Expand T079 scope** from "Add repository_id" to comprehensive provenance implementation leveraging CocoIndex's native capabilities. This enables: + - Enhanced conflict detection with full data lineage analysis - Audit trails showing exactly which analysis stages contributed to each conflict prediction - Deterministic incremental updates (only re-analyze when relevant upstream data changes) @@ -45,7 +46,7 @@ This research evaluates CocoIndex's native provenance tracking capabilities and From the deep architectural research (deep-architectural-research.md), CocoIndex's dataflow orchestration inherently includes provenance tracking: -``` +```plaintext CocoIndex Dataflow Structure: β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ Sources β”‚ ← Track: which source, version, access time @@ -58,6 +59,7 @@ CocoIndex Dataflow Structure: ``` **Critical Feature**: CocoIndex's "content-addressed fingerprinting" automatically creates lineage chains: + - Input hash + logic hash + dependency versions β†’ Transformation output fingerprint - Dependency graph computation identifies which upstream changes invalidate which artifacts - Only recompute invalidated nodes (core to >90% cache hit rate requirement) @@ -65,7 +67,8 @@ CocoIndex Dataflow Structure: ### 1.2 Automatic Provenance Metadata at Each Stage #### Source-Level Provenance -``` + +```plaintext CocoIndex Source Tracking: β”œβ”€ Source Type: LocalFiles, Git, S3, etc. β”œβ”€ Source Identifier: Path, URL, bucket name @@ -76,6 +79,7 @@ CocoIndex Source Tracking: ``` **Example for Thread's LocalFiles Source**: + ```rust pub struct LocalFilesSource { paths: Vec, @@ -91,7 +95,8 @@ pub struct LocalFilesSource { ``` #### Transformation-Level Provenance -``` + +```plaintext CocoIndex Function Tracking: β”œβ”€ Function ID: "thread_parse_function" β”œβ”€ Function Version: "1.0.0" (language: thread-ast-engine) @@ -114,6 +119,7 @@ CocoIndex Function Tracking: ``` **Thread Integration Point**: + ```rust // When ThreadParseFunction executes as CocoIndex operator: impl SimpleFunctionExecutor for ThreadParseExecutor { @@ -142,7 +148,8 @@ impl SimpleFunctionExecutor for ThreadParseExecutor { ``` #### Target-Level Provenance -``` + +```plaintext CocoIndex Target Tracking: β”œβ”€ Target Type: PostgresTarget, D1Target, QdrantTarget β”œβ”€ Write Timestamp: When data was persisted @@ -159,7 +166,7 @@ CocoIndex Target Tracking: CocoIndex automatically constructs full lineage chains across multiple transformation stages: -``` +```plaintext Complete Lineage Chain (Thread Real-Time Code Graph Example): File "main.rs" (Git repo, commit abc123, timestamp 2026-01-11T10:30:00Z) @@ -217,12 +224,13 @@ pub struct ExecutionRecord { } pub struct DependencyGraph { - nodes: HashMap, + nodes: thread_utils::RapidMap, edges: Vec<(String, String)>, // operation -> operation dependencies } ``` This means CocoIndex can answer: + - "What's the complete lineage for node X?" - "Which operations were executed to produce Y?" - "When was Z computed and from what input?" @@ -234,7 +242,7 @@ This means CocoIndex can answer: ### 2.1 T079 Task Definition (from tasks.md) -``` +```plaintext T079 [US3] Add repository_id to GraphNode and GraphEdge for source attribution ``` @@ -257,6 +265,7 @@ pub struct GraphNode { ``` **What T079 adds** (proposed implementation): + ```rust pub struct GraphNode { // ... existing fields ... @@ -273,25 +282,30 @@ pub struct GraphNode { ### 2.3 Limitations of Current T079 Approach **Repository Attribution Only**: + - Answers: "Which repository did this node come from?" - Doesn't answer: "Which data source version? When? Why?" **Missing Transformation Context**: + - No tracking of which analysis stages created the node - Can't trace: "Was this conflict detected by Tier 1, 2, or 3 analysis?" - Misses: "Did cache miss cause re-analysis?" **No Temporal Provenance**: + - No timestamp of when analysis occurred - Can't answer: "Is this analysis stale?" - Breaks: Incremental update efficiency **Upstream Data Lineage Invisible**: + - If source file changed, can't efficiently determine which nodes are invalidated - Content-addressed caching becomes less effective - Incremental updates may re-analyze unnecessarily **Conflict Audit Trail Missing**: + - FR-014 requires tracking "which data source, version, and timestamp" - T079 only provides repository_id, missing version and timestamp - Insufficient for FR-018 (audit and learning) @@ -303,7 +317,7 @@ pub struct GraphNode { ### 3.1 Comparison Matrix | Aspect | T079 (Current) | CocoIndex Native | Need for Code Graph | -|--------|---|---|---| +| -------- | --- | --- | --- | | **Source Attribution** | βœ“ repository_id | βœ“ Source ID + type | FR-014 βœ“ | | **Source Version** | βœ— | βœ“ Git commit, S3 ETag | FR-014 βœ“ | | **Source Timestamp** | βœ— | βœ“ Access timestamp | FR-014 βœ“ | @@ -317,8 +331,9 @@ pub struct GraphNode { ### 3.2 CocoIndex Advantages for Code Graph Provenance -**1. Automatic at Source Layer** -``` +#### 1. Automatic at Source Layer + +```plaintext CocoIndex LocalFilesSource automatically captures: - File path (identity) - File modification time (version timestamp) @@ -327,8 +342,9 @@ CocoIndex LocalFilesSource automatically captures: - Filesystem attributes (metadata context) ``` -**2. Automatic at Transformation Layer** -``` +#### 2. Automatic at Transformation Layer + +```plaintext For each Thread operator (ThreadParseFunction, ThreadExtractSymbols, etc.): - Input: what file/AST hash was processed - Operation: which parser/extractor, what version @@ -337,8 +353,9 @@ For each Thread operator (ThreadParseFunction, ThreadExtractSymbols, etc.): - Output: what hash was produced ``` -**3. Automatic at Target Layer** -``` +#### 3. Automatic at Target Layer + +```plaintext For PostgresTarget/D1Target: - Write timestamp: precisely when persisted - Transaction metadata: ACID context @@ -346,8 +363,9 @@ For PostgresTarget/D1Target: - Write latency: performance metrics ``` -**4. Queryable Relationship** -``` +#### 4. Queryable Relationship + +```plaintext After execution, can query: - "Show me execution record for node X's lineage" - "What was the input hash that produced node Y?" @@ -409,7 +427,7 @@ pub struct LineageRecord { pub executed_at: DateTime, pub duration_ms: u64, pub success: bool, - pub metadata: HashMap, // Language, config version, etc. + pub metadata: thread_utils::RapidMap, // Language, config version, etc. } pub enum OperationType { @@ -513,7 +531,8 @@ pub struct UpstreamChange { ### 5.1 Incremental Update Optimization (SC-INCR-001) **Without Full Provenance** (Current T079): -``` + +```plaintext File X changes: - Mark all nodes in file X as dirty - Possibly: mark all reverse dependencies as dirty @@ -523,7 +542,8 @@ File X changes: ``` **With Full Provenance** (CocoIndex native): -``` + +```plaintext File X changes (new hash): - CocoIndex tracks: upstream_hashes for ALL nodes - Find nodes where upstream contains old file hash @@ -535,14 +555,16 @@ File X changes (new hash): ### 5.2 Conflict Audit Trail (FR-018) **Current**: -``` + +```plaintext Conflict detected: "function A modified" Question: How was this detected? Why? When? Answer: (No information) ``` **With Enhanced Provenance**: -``` + +```plaintext Conflict detected: 2026-01-11T10:30:15Z Analysis pipeline: 1. Parse (Tier 1): 15ms, file hash abc123 @@ -563,7 +585,8 @@ If investigation needed: **Scenario**: Conflict detector reports an issue that manual inspection disagrees with **With Full Provenance**: -``` + +```plaintext Question: "Why was this marked as a conflict?" Answer (from lineage records): @@ -585,7 +608,8 @@ Investigation path: ### 5.4 Cache Effectiveness Analysis (SC-CACHE-001) **With Provenance Tracking**: -``` + +```plaintext Query: "Why did cache miss for this node?" Answer: @@ -605,7 +629,8 @@ This proves: ### 5.5 Cross-Repository Dependency Transparency **T079 Current**: -``` + +```plaintext Node "process_payment" repository_id: "stripe-integration-service" @@ -614,7 +639,8 @@ Cannot answer: "Is this fresh from latest code? When?" ``` **With Full Provenance**: -``` + +```plaintext Node "process_payment" repository_id: "stripe-integration-service" source_version: SourceVersion { @@ -721,6 +747,7 @@ pub async fn execute_code_analysis_flow( **Concern**: Adding provenance to existing nodes **Solution**: + - Mark provenance fields as `Option` initially - Provide migration for existing nodes (backfill with minimal provenance) - New analyses automatically get full provenance @@ -746,7 +773,7 @@ pub struct GraphNode { ### 7.1 What T079 Misses | Missing Feature | CocoIndex Capability | Value | -|---|---|---| +| --- | --- | --- | | **Source Version Tracking** | Native SourceVersion tracking | FR-014 completeness | | **Timestamp Precision** | Per-operation execution times | Audit trail quality | | **Analysis Pipeline Transparency** | Complete lineage records | Debugging conflicts | @@ -760,6 +787,7 @@ pub struct GraphNode { If T079 implemented as-is (repository_id only): **Problems**: + 1. βœ— Can't prove cache is working correctly (missing cache metadata) 2. βœ— Can't audit why conflict was detected (missing tier execution records) 3. βœ— Can't efficiently invalidate caches on upstream change (missing upstream lineage) @@ -767,6 +795,7 @@ If T079 implemented as-is (repository_id only): 5. βœ— Doesn't fully satisfy FR-014 (missing version and timestamp) **Rework Required Later**: + - Phase 1: Implement repository_id (T079 as-is) - Phase 2: Add source versioning (more work, schema changes) - Phase 3: Add lineage tracking (significant refactor) @@ -780,22 +809,26 @@ If T079 implemented as-is (repository_id only): ### 8.1 Phased Approach to Minimize Risk -**Phase 1: Foundation (Week 1)** +#### Phase 1: Foundation (Week 1) + - Implement basic `SourceVersion` struct (Git commit, S3 ETag, local timestamp) - Add `source_version` and `source_timestamp` fields to GraphNode - Update T079 scope document -**Phase 2: CocoIndex Integration (Week 2-3)** +#### Phase 2: CocoIndex Integration (Week 2-3) + - Build `ProvenanceCollector` that extracts ExecutionRecords - Implement `LineageRecord` structure - Wire CocoIndex execution data into node storage -**Phase 3: Queryable Provenance (Week 4)** +#### Phase 3: Queryable Provenance (Week 4) + - Implement `ProvenanceQuery` API - Add provenance table migrations - Build debugging tools (show lineage, trace conflicts) -**Phase 4: Validation (Week 5)** +#### Phase 4: Validation (Week 5) + - Verify incremental updates work correctly - Confirm cache invalidation matches lineage - Validate conflict audit trail completeness @@ -803,23 +836,27 @@ If T079 implemented as-is (repository_id only): ### 8.2 Parallel Work Streams **T079.1 + T079.2**: Can happen in parallel + - T079.1: Graph structure changes (module organization) - T079.2: CocoIndex integration (different crate) **T079.3**: Depends on T079.1 + T079.2 + - Needs provenance data to store **T079.4**: Depends on T079.3 + - Needs schema for persistence **T079.5**: Depends on all above + - Needs all pieces in place to query --- ## 9. Architecture Diagram: Enhanced Provenance -``` +```plaintext File System / Git / Cloud Source β”‚ β”œβ”€ Source: LocalFiles, Git, S3 @@ -902,12 +939,14 @@ Database: nodes, edges, provenance tables ### 10.2 Impact on Other Features **Helps**: + - SC-INCR-001/002: Incremental updates can be more precise - SC-CACHE-001: Cache effectiveness becomes provable - FR-018: Audit trail and learning from past conflicts - FR-014: Full compliance (not just repository_id) **Independent Of**: + - Real-time performance (FR-005, FR-013) - Conflict prediction accuracy (SC-002) - Multi-source support (US3) @@ -917,12 +956,14 @@ Database: nodes, edges, provenance tables **Risk**: Expanding scope increases implementation complexity **Mitigation**: + - CocoIndex provides most of the data automatically - Phased approach (foundation β†’ integration β†’ validation) - Backward compatible with optional fields initially **Risk**: CocoIndex API changes **Mitigation**: + - ExecutionRecords API is stable (core dataflow concept) - Even if API changes, basic capability preserved - Worst case: store less detailed provenance @@ -934,17 +975,20 @@ Database: nodes, edges, provenance tables ## 11. Research Sources and References ### 11.1 CocoIndex Documentation + - deep-architectural-research.md: Complete CocoIndex architecture analysis - research.md Task 1: CocoIndex Integration Architecture - research.md Task 8: Storage Backend Abstraction Pattern ### 11.2 Thread Real-Time Code Graph + - spec.md: FR-014 provenance requirement - data-model.md: GraphNode, GraphEdge structures - tasks.md: T079 current scope - contracts/rpc-types.rs: API definitions ### 11.3 Key Architectural Documents + - CLAUDE.md: Project architecture and CocoIndex integration - Constitution v2.0.0: Service-library architecture principles diff --git a/specs/001-realtime-code-graph/spec.md b/specs/001-realtime-code-graph/spec.md index 347d756..6dd96b9 100644 --- a/specs/001-realtime-code-graph/spec.md +++ b/specs/001-realtime-code-graph/spec.md @@ -15,7 +15,7 @@ SPDX-License-Identifier: AGPL-3.0-or-later ## Related Documents | Document | Location | Role | -|----------|----------|------| +| ---------- | ---------- | ------ | | Semantic Classification Spec | [`docs/architecture/SEMANTIC_CLASSIFICATION_SPEC.md`](../../docs/architecture/SEMANTIC_CLASSIFICATION_SPEC.md) | Canonical implementation reference for `thread-definitions` β€” classifier internals, 8-stage lookup pipeline, data schemas, scoring model, language-agnostic query design | | AI Knowledge Layer Design | [`docs/architecture/AI_KNOWLEDGE_LAYER_DESIGN.md`](../../docs/architecture/AI_KNOWLEDGE_LAYER_DESIGN.md) | Background architectural proposal for the multi-resolution knowledge layer (L0–L4); predates the classifier port proposal | | Implementation Plan | [`specs/001-realtime-code-graph/plan.md`](./plan.md) | Phased implementation plan, crate breakdown, dependency graph | @@ -51,10 +51,12 @@ Two developers are working on different features that unknowingly modify overlap **Acceptance Scenarios**: -1. **Given** two developers editing different files, **When** developer A saves a modified file locally that affects a function call chain modified by developer B, **Then** system detects potential conflict and notifies both developers within 5 seconds of the save event +1. **Given** two developers editing different files **and developer B's active working changes are visible to the system** (either committed to the shared baseline, or available via Delta-sharing within the same active session), **When** developer A saves a file (file system write event detected by the `thread-indexer` watcher) that affects a function call chain modified by developer B, **Then** system detects potential conflict and notifies both developers within 5 seconds of the save event 2. **Given** a developer modifying a widely-used API, **When** the change would break 15 downstream callers, **Then** system lists all affected callers with severity ratings before commit 3. **Given** asynchronous work across timezones, **When** developer A's changes conflict with developer B's 8-hour-old WIP branch, **Then** system provides merge preview showing exactly what will conflict +> **Trigger Note**: Conflict detection is triggered by file system watcher events (same mechanism as FR-013 real-time propagation). No explicit user action is required. Developers working in any editor that saves to disk automatically participate in real-time conflict detection. This applies to OSS CLI deployment; edge deployment conflict detection (commercial) uses the same watcher event as the source trigger, forwarded to the Container analysis service. + --- ### User Story 3 - Multi-Source Code Intelligence (Priority: P3) @@ -85,20 +87,24 @@ When a conflict is predicted, the system suggests resolution strategies based on 1. **Given** a detected conflict between two changes, **When** both changes are analyzed semantically, **Then** system provides resolution strategy with confidence score and reasoning 2. **Given** conflicting changes to the same function, **When** one change modifies logic and other adds logging, **Then** system recommends specific merge order and identifies safe integration points -3. **Given** breaking API change conflict, **When** system analyzes impact, **Then** it suggests adapter pattern or migration path with code examples +3. **Given** breaking API change conflict, **When** system analyzes impact, **Then** it suggests adapter pattern or migration path with code examples formatted as fenced code blocks. Code examples are language-specific compilable snippets where the conflict context allows deterministic generation (known language, clear symbol signatures); structured pseudocode with inline comments otherwise. + +> **Resolution Format Note**: AI-generated resolution suggestions use fenced code blocks throughout. The system targets language-specific compilable code for common patterns (adapter, migration, signature update) where the AST context provides sufficient precision. When the AI cannot determine a compilable form with high confidence, structured pseudocode with explanatory comments is used. The specific AI integration (Workers AI, external LLM, or local model) is a commercial implementation detail β€” this spec defines the output format contract only. --- ### Edge Cases -- What happens when indexing a codebase larger than available memory (1M+ files)? -- How does the system handle circular dependencies in the code graph? -- What occurs when two data sources contain the same file with different versions? -- How does conflict prediction work when one developer is offline for extended periods? -- What happens if the underlying analysis engine crashes mid-query? -- How does the system handle generated code files that change frequently? -- What occurs when database connection is lost during real-time updates? -- How does the system manage version drift between local and cloud deployments? +| Edge Case | Handled By | +| ----------- | ------------ | +| Codebase larger than available memory (1M+ files) | FR-022 (memory governance, adaptive batching), FR-024 (partial graph results) | +| Circular dependencies in the code graph | FR-025 (cycle detection, depth-limiting) | +| Two data sources contain the same file with different versions | FR-004 (CAS β€” same content hash = same entry; different content = different entries, both retained) | +| Developer offline for extended periods (conflict prediction) | SC-002 deferred to commercial scope (thread-conflict); FR-017 Overlay Graph tracks committed baseline for offline comparison | +| Underlying analysis engine crashes mid-query | FR-023 (circuit breaker), FR-024 (partial results with allow_partial flag) | +| Generated code files that change frequently | FR-008 (incremental updates), FR-012 (CAS deduplication β€” identical generated output hits cache) | +| Database connection lost during real-time updates | FR-023 (circuit breaker for storage backends), FR-024 (partial results) | +| Version drift between local and cloud deployments | FR-017 (Overlay Graph β€” Base Layer is immutable committed state; Deltas are ephemeral) | ## Requirements *(mandatory)* @@ -114,32 +120,49 @@ When a conflict is predicted, the system suggests resolution strategies based on - **Vectorize** (Cloudflare, Edge): Stores vector embeddings for semantic similarity search on the edge. **Qdrant** (optional, CLI-only): Self-hosted vector backend for local deployments. Note: ReCoco's Qdrant target is currently disabled due to a dependency conflict; Vectorize is the primary vector backend for edge deployment. - **FR-005**: System MUST support real-time graph queries responding within 1 second for codebases up to 100k files - **FR-006**: System MUST detect and classify concurrent code changes into a Three-Tier Conflict Taxonomy: **Tier 1 (Syntactic)** for parse/compile errors (detected via AST diff <100ms), **Tier 2 (Structural)** for valid syntax but broken linking/structure (detected via Symbol Graph <1s), and **Tier 3 (Semantic)** for valid structure but incompatible logic/behavior (detected via Dataflow/Semantic analysis <5s). Results update progressively as each tier completes. -- **FR-007**: System MUST provide conflict predictions with specific details: file locations, conflicting symbols, impact severity ratings, confidence scores, and conflict tier classification. Initial predictions (Tier 1) deliver within 100ms, refined predictions (Tier 2) within 1 second, comprehensive predictions (Tier 3) within 5 seconds. +- **FR-007**: System MUST provide conflict predictions with specific details: file locations, conflicting symbols, impact severity ratings, confidence scores, and conflict tier classification. Initial predictions (Tier 1) deliver within 100ms, refined predictions (Tier 2) within 1 second, comprehensive predictions (Tier 3) within 5 seconds. If a detection tier fails to complete (timeout or circuit breaker), the system MUST send a terminal `ConflictUpdate` with `status: Timeout, final: true` containing the last known tier result. Silence after a tier fires is NOT acceptable. - **FR-008**: System MUST support incremental updates where only changed files and affected dependencies are re-analyzed -- **FR-009**: System MUST allow pluggable analysis engines where the underlying AST parser, graph builder, or conflict detector can be swapped without rewriting application code. This abstraction MUST support diverse type systems (e.g., CodeWeaver's "Things/Connections" model) alongside standard Tree-sitter nodes. +- **FR-009**: System MUST allow pluggable analysis engines where the underlying AST parser, graph builder, or conflict detector can be swapped via **compile-time composition only** β€” not runtime plugin loading, not hot-swappable without recompilation. Swapping an engine requires: (1) implementing the relevant trait (e.g., `CodeAnalyzer`, `GraphBuilder`), (2) registering the implementation in the `FactoryRegistry`, (3) updating the configuration file to activate it, and (4) recompiling. Zero changes to pipeline orchestration code in `thread-flow` or `thread-services` are required. Adding a new crate dependency and implementing a trait is the expected workflow β€” this is not "rewriting application code." This abstraction MUST support diverse type systems (e.g., CodeWeaver's "Things/Connections" model) alongside standard Tree-sitter nodes. - **FR-010**: System MUST deploy to Cloudflare Workers using a **Multi-Worker Architecture** to support ~166 languages. The architecture consists of a central Router/Handler Worker that delegates to specialized Language Workers via Service Bindings. **OSS Boundary**: OSS distribution includes a simplified single-worker deployment bundling only core languages (Rust, Python, TypeScript) to minimize complexity. **Constraint**: Edge deployment MUST NOT load full graph into memory. Must use streaming/iterator access patterns and D1 Reachability Index. > **Implementation note**: The `thread-definitions` semantic classifier provides 80%+ accuracy on any tree-sitter grammar out of the box via universal rules (2,444 cross-language patterns). Full language support (~100%) requires only ~10–50 lines of TOML overrides per language. Target: all ~166 tree-sitter-language-pack languages. File-extension language identification for ~200 languages available from CodeWeaver as `data/file_extensions.json`. - **FR-LANGDETECT**: Language identification SHALL use a two-tier strategy: (1) hardcoded extension lookup (primary, zero-cost), (2) AST fingerprinting fallback β€” parse with candidate grammar, classify node types, score = recognized/total; grammar with highest score (threshold ~0.75) is the probable language. Enables reliable detection for extensionless files and ambiguous cases. - **FR-011**: System MUST run as a local CLI application for developer workstation use (available in OSS). **Local-Only Mode**: In this mode, Postgres serves as both the CAS store and the "Real-Time Service" (managing the Overlay/Deltas in memory), ensuring full functionality without cloud connectivity. - **FR-012**: System MUST use content-addressed caching to avoid re-analyzing identical code sections across updates -- **FR-013**: System MUST propagate code changes to all connected clients within 100ms of detection for real-time collaboration +- **FR-013**: System MUST propagate code changes to all connected clients within 100ms, measured from the moment the file system event is received by the `thread-indexer` watcher (or equivalent source event for non-filesystem sources), to the moment the first WebSocket/SSE message is sent to connected clients. This budget covers: event receipt β†’ incremental analysis β†’ graph delta computation β†’ client notification. Applies to CLI deployment; edge deployment target is 200ms p95 due to additional Containerβ†’Worker hop. - **FR-014**: System MUST track analysis provenance showing which data source, version, and timestamp each graph node originated from -- **FR-015**: System MUST support semantic search across the codebase to find similar functions, related types, and usage patterns -- **FR-016**: System MUST provide graph traversal APIs via **Connect-RPC** (gRPC-compatible over HTTP/1.1 & HTTP/2) using **Protobuf** service definitions. These definitions MUST be centralized in a `thread-api-proto` crate to ensure type safety across CLI (Rust), Edge (WASM), and Web (TypeScript) clients. +- **FR-015**: System MUST support semantic search across the codebase to find similar functions, related types, and usage patterns. When the vector search backend (Vectorize/Qdrant) is unavailable, FR-015 MUST degrade gracefully to AST-based search using `SemanticClass` and importance scores from `thread-definitions`. AST-based search still provides rich structural results (function definitions, type relationships, call patterns) β€” it loses only vector similarity ranking. Responses in degraded mode include `"search_mode": "ast_semantic"` to distinguish from full vector search. This degraded mode is NOT 'keyword-only' β€” it leverages the full semantic classification layer. +- **FR-016**: System MUST provide graph traversal APIs using **prost**-generated Protobuf message encoding over plain **HTTP POST** transport (`Content-Type: application/x-protobuf`). There is NO Connect-RPC or gRPC framing β€” Cloudflare Workers do not support HTTP/2 trailers required by Connect-RPC/gRPC. TypeScript clients use `buf` CLI + `@bufbuild/protobuf` (protobuf-es v2) for type-safe code generation from `.proto` definitions. Internal Rust-to-Rust communication (Workerβ†’Container service bindings, Container internal, CLI internal calls) uses **postcard** for compact binary serialization. MCP server integration (future) uses `serde_json`/JSON-RPC 2.0 as a separate transport adapter. API type definitions are centralized in the `thread-api` crate to ensure type safety across CLI (Rust), Edge (WASM), and Web (TypeScript) clients. All `.proto` files MUST use `package thread.v1;` versioning. Proto files are committed at `crates/thread-api/proto/v1/`. Field additions within a version are backward-compatible and do not require a version bump. Removing or renumbering fields, or changing field types incompatibly, requires a new package version (`thread.v2;`) and a corresponding new proto directory. The TypeScript client regenerates from the versioned proto directory via `buf generate`. + + **Error response format**: `Content-Type: application/x-protobuf` applies to 2xx success responses only. Non-2xx error responses use `Content-Type: application/json` with a JSON error envelope: `{"error": "", "message": "", "details": }`. This applies regardless of the request content type, ensuring error bodies are parseable by generic HTTP tooling without a proto schema. The JSON error bodies shown in FR-023 and FR-024 (`{"error": "CIRCUIT_OPEN", ...}`) follow this format. - **FR-017**: System MUST utilize an **Overlay Graph Architecture** to manage state and consistency. - **Base Layer (Immutable)**: Represents the graph at a specific Git commit, stored in D1 (Cloud) or Postgres (Local). - **Delta Layer (Ephemeral)**: Represents local uncommitted changes (dirty state), stored in memory or temporary local storage. - **Unified View**: The query engine merges Base + Delta at runtime to provide a real-time view without modifying the persistent Base storage. - **Conflict Detection**: Performed by comparing active Deltas from different users against the Base, rather than merging database states. + - **Default Behavior**: The Unified View (Base + Delta merge) is the default query behavior. Callers receive their local uncommitted changes automatically reflected in all graph query results. To query the committed Base Layer only (excluding local deltas), callers pass `include_local_delta: false` in the query request. This opt-out is useful for: comparing local changes against the committed baseline, debugging conflict predictions, or generating reports from stable committed state. - **FR-018**: System MUST maintain graph consistency when code is added, modified, or deleted during active queries - **FR-019**: System MUST log all conflict predictions and resolutions for audit and learning purposes - **FR-020**: System MUST handle authentication and authorization for multi-user scenarios when deployed as a service, utilizing standard **OAuth2/OIDC** protocols. -- **FR-021**: System MUST expose metrics for: query performance, cache hit rates, indexing throughput, and storage utilization -- **FR-022**: System MUST utilize batched database operations (D1 Batch API) and strictly govern memory usage (<80MB active set) on Edge via ReCoco adaptive controls (limiting in-flight rows and bytes) to prevent OOM errors. Large payloads exceeding D1 limits should be offloaded to R2 or a Dead Letter Queue (DLQ) pattern. -- **FR-023**: System MUST implement a **Circuit Breaker** pattern for data sources. If a source fails >5 times in 30s, it moves to OPEN state. After 60s in OPEN state, it moves to HALF-OPEN to allow a single probe request to verify source health. +- **FR-021**: System MUST expose metrics for: query performance, cache hit rates, indexing throughput, and storage utilization. This covers the `/metrics` and `/health` HTTP endpoints. Log stream observability (structured per-operation logs across pipeline crates) is covered by FR-027. +- **FR-022**: System MUST utilize batched database operations (D1 Batch API) and strictly govern memory usage (<80MB active set) on Edge via ReCoco adaptive controls (limiting in-flight rows and bytes) to prevent OOM errors. Large payloads exceeding storage backend limits MUST be offloaded via a configurable **Large Payload Offload** strategy rather than failing the write. The offload strategy is backend-specific (e.g., R2 + Dead Letter Queue for Cloudflare edge deployment) and implemented in the deployment layer, not the OSS library. **Recommended thresholds** (informative, not normative for OSS): trigger offload when a single payload item exceeds 512KB or when a batch exceeds 20MB. Offloaded items MUST be processed asynchronously and retried until acknowledged; a maximum retry count (recommended: 5) after which items are logged and discarded. Offload queue depth MUST be observable via the metrics endpoint (FR-021). +- **FR-023**: System MUST implement a **Circuit Breaker** pattern for data sources. If a source fails >5 times in 30s, it moves to OPEN state. After 60s in OPEN state, it moves to HALF-OPEN to allow a single probe request to verify source health. Circuit breaker pattern applies to: configured Git/S3/GitHub/GitLab data sources, Postgres storage backend, D1 storage backend, and Vectorize/Qdrant vector search backends. - **FR-024**: System MUST support **Partial Graph Results**. Query APIs must accept an `allow_partial=true` flag and return a "Graph Result Envelope" containing available subgraphs, a list of missing regions, and error details, rather than failing the entire query. +**FR-023/FR-024 Interaction**: When a circuit breaker is OPEN for a required data source and an incoming query has `allow_partial=false`: + +1. **If request timeout budget allows**: Queue the request. When the circuit moves to HALF-OPEN and the probe succeeds, process the queued request. Return a `Retry-After` header indicating estimated wait time. +2. **If timeout budget is exceeded before HALF-OPEN**: Return an error response with `{"error": "CIRCUIT_OPEN", "source": "", "retry_after_seconds": , "partial_available": true}`. Include a hint that retrying with `allow_partial=true` would return available data immediately. + +Queued requests are bounded: maximum 100 queued requests per circuit-broken source. Beyond this limit, immediately return the error response. + - **FR-025**: System MUST detect and handle circular dependencies via depth-limiting and cycle detection mechanisms to prevent infinite recursion during graph traversal. +- **FR-026**: System MUST expose a health check endpoint `GET /health` returning a JSON response within 50ms: `{"status": "ok"|"degraded"|"starting", "cache_hit_rate": , "lag_ms": , "storage_ok": }`. `"starting"` status indicates vector index warmup in progress; core graph queries remain available. `"degraded"` indicates a storage backend circuit breaker is OPEN. This endpoint requires no authentication. + +- **FR-027**: System MUST emit structured logs in JSON format for all significant operations throughout the analysis pipeline. Minimum required fields per log entry: `timestamp` (ISO-8601), `level` (`error`/`warn`/`info`/`debug`), `component` (crate name), `operation` (pipeline stage or function name), `duration_ms` (for timed operations), and applicable entity IDs (`session_id`, `repository_id` where available). Errors MUST include `error_type` and `context` fields. + + **Deployment paths**: + - **Edge**: `workers-rs` log macros β†’ Cloudflare Workers Logs β†’ automated OTEL export. No manual trace ID propagation is required in the `thread-api` protocol β€” Cloudflare handles span correlation across the Workerβ†’Container hop automatically. + - **CLI**: `tracing` crate with JSON subscriber (`tracing-subscriber` + `fmt` JSON format). Human-readable pretty-print format available via feature flag or environment variable for local development. **FR-CLASSIFY**: The system MUST classify all extracted AST node types into one of 22 language-agnostic `SemanticClass` categories using the `thread-definitions` classifier, enabling AI-context importance ranking. @@ -149,6 +172,7 @@ When a conflict is predicted, the system suggests resolution strategies based on - Storage: `semantic_class` field on `GraphNode`; importance scores computed on-demand **Success criteria:** + - All `GraphNode`s have a populated `semantic_class` field - Context pack generation can rank definitions by `task_score(class, agent_task)` - New language support achievable via TOML overrides without Rust code changes @@ -169,18 +193,29 @@ When a conflict is predicted, the system suggests resolution strategies based on ## Success Criteria *(mandatory)* -### Measurable Outcomes +### Technical Success Criteria + +Measurable, automatable outcomes tied to functional requirements. - **SC-001**: Developers can query code dependencies and receive complete results in under 1 second for codebases up to 100,000 files -- **SC-002**: System detects 95% of potential merge conflicts before code is committed, with false positive rate below 10%. False Positive defined as: A predicted conflict that is manually dismissed by the user or successfully merged without modification. +- **SC-002**: System detects 95% of potential merge conflicts before code is committed, with false positive rate below 10%. False Positive defined as: A predicted conflict that is manually dismissed by the user or successfully merged without modification. *(Commercial scope β€” requires thread-conflict crate; deferred.)* - **SC-003**: Incremental indexing completes in under 10% of full analysis time for typical code changes (affecting <5% of files) - **SC-004**: System handles 1000 concurrent users querying simultaneously with <2 second p95 response time -- **SC-005**: Conflict resolution time reduces by 70% (from 30 minutes to under 10 minutes) when using AI-assisted suggestions - **SC-006**: Cross-repository dependency tracking works across 5+ different code sources without manual configuration -- **SC-007**: Developer satisfaction score of 4.5/5 for "confidence in making code changes" after using conflict prediction +- **SC-007-OSS**: `ReachabilityIndex` returns correct k-hop ancestor/descendant sets for 100% of test cases in `tests/benchmarks/reachability_accuracy.rs`. Test corpus: 10,000-node synthetic graph with known ground-truth reachability up to k=3 hops (FR-017, T034). *(OSS proxy for SC-002, which is deferred to commercial scope with thread-conflict.)* +- **SC-035-OSS**: WebSocket transport delivers `CodeChangeDetected` and `GraphUpdate` messages to all connected test clients within 100ms in the integration test suite for `thread-realtime` (T035). Verified against a local mock repository watcher with 50 concurrent test connections. + +### Product Goals *(tracked by product metrics, not automated tests)* + +These express desired user outcomes. They are not directly verifiable by automated tests and are tracked +via usage analytics, user surveys, and adoption metrics. They require the commercial conflict detection +features (thread-conflict) to be meaningful. + +- **SC-005**: Conflict resolution time reduces by 70% (from 30 minutes to under 10 minutes) when using AI-assisted suggestions *(requires thread-conflict β€” deferred to commercial)* +- **SC-007**: Developer satisfaction score of 4.5/5 for "confidence in making code changes" after using conflict prediction *(requires thread-conflict β€” deferred to commercial)* - **SC-008**: 90% of developers successfully integrate the system into their workflow within first week of adoption -- **SC-009**: Real-time collaboration features reduce integration delays from hours to minutes (75% improvement) -- **SC-010**: System operates with 99.9% uptime when deployed to Cloudflare edge network +- **SC-009**: Real-time collaboration features reduce integration delays from hours to minutes (75% improvement) *(requires thread-conflict β€” deferred to commercial)* +- **SC-010**: System operates with 99.9% uptime when deployed to Cloudflare edge network *(SLA target β€” tracked operationally via uptime monitoring, not by automated test)* ### Service Architecture Success Criteria @@ -191,13 +226,14 @@ When a conflict is predicted, the system suggests resolution strategies based on - **SC-CACHE-001**: Content-addressed cache achieves >90% hit rate for repeated analysis of unchanged code sections - **SC-CACHE-002**: Cache invalidation occurs within 100ms of source code change detection - **SC-CACHE-003**: Cache size remains under 500MB for 10k file repository, scaling linearly with codebase size -- **SC-CACHE-004**: Cache warmup completes in under 5 minutes for new deployment with existing persistent storage +- **SC-CACHE-004**: Core AST graph analysis is available immediately on deployment β€” there is no warmup period for graph queries, dependency analysis, or semantic classification. Vector search (FR-015, Vectorize/Qdrant) may require index warmup; during this period, queries return results with `semantic_search_available: false` and fall back to AST-based search (see D7/FR-015 degraded mode). Cache warmup for previously-analyzed codebases (restoring from persistent storage) completes in under 5 minutes. +- **SC-HEALTH-001**: Health endpoint responds within 50ms under normal load and within 200ms during peak indexing. Returns `status: "starting"` during cold-start warmup and `status: "degraded"` when any circuit breaker is OPEN (FR-026). #### Incremental Updates - **SC-INCR-001**: Code changes trigger only affected component re-analysis, not full codebase scan - **SC-INCR-002**: Incremental update completes in <10% of full analysis time for changes affecting <5% of files -- **SC-INCR-003**: Dependency graph updates propagate to all connected clients in <100ms +- **SC-INCR-003**: Dependency graph updates propagate to all connected clients in <100ms (measured from watcher event receipt) - **SC-INCR-004**: Change detection accurately identifies affected files with 99% precision (no missed dependencies) #### Storage Performance @@ -219,19 +255,65 @@ When a conflict is predicted, the system suggests resolution strategies based on - **SC-EDGE-005**: Commercial edge workers handle 10k requests per second per geographic region without rate limiting - **SC-EDGE-006**: Commercial global edge deployment achieves <100ms p95 latency from any major city worldwide +#### Provenance Tracking + +- **SC-PROV-001**: Provenance query for any `GraphNode` returns source repository, commit ref, and ingestion timestamp within 100ms p95 (FR-014) + +#### Semantic Search + +- **SC-SEARCH-001**: Semantic similarity search achieves β‰₯70% precision and β‰₯70% recall on annotated benchmark set; top-10 results returned within 200ms p95 (FR-015) +- **SC-SEARCH-002**: When Vectorize/Qdrant is unavailable, semantic search MUST automatically degrade to AST-based search and return results within 200ms p95. Degraded responses include `"search_mode": "ast_semantic"`. No error is returned to the caller for this degraded mode. + +#### Engine Pluggability + +- **SC-ENGINE-001**: A new `CodeAnalyzer` implementation can be integrated by: (1) implementing the relevant trait, (2) registering in `FactoryRegistry`, (3) updating the configuration file, (4) recompiling β€” with zero modifications to `thread-flow` or `thread-services` orchestration code. Verified by integration test that adds a mock `CodeAnalyzer` implementation (FR-009). + +#### Language-Agnostic Queries + +- **SC-LANGQUERY-001**: `find_by_class(SemanticClass::DefinitionCallable)` returns semantically equivalent results across Rust (`function_item`), Python (`function_definition`), and Go (`function_declaration`) test fixtures with no language-specific query code at the call site. Verified by cross-language query integration tests in the T-C12 test suite (FR-LANGQUERY). + +#### Audit Log + +- **SC-AUDIT-001**: Conflict event log captures 100% of conflict predictions and status transitions; retained for β‰₯90 days; queryable by file, developer, and time range (FR-019) *(Commercial scope β€” deferred with thread-conflict)* + +#### Observability + +- **SC-OBS-001**: All pipeline crates (`thread-flow`, `thread-graph`, `thread-indexer`, `thread-api`, `thread-realtime`) emit structured log entries containing the required fields from FR-027 for: analysis start/completion, cache hits/misses, storage operation latency, and error conditions. Verified by integration test capturing log output and asserting schema compliance on a representative operation in each crate (FR-027). + +#### Language Detection + +- **SC-LANGDETECT-001**: Extensionless file language detection achieves β‰₯95% accuracy on a benchmark set of 500 representative files across major languages; false-language-assignment rate below 2% (FR-LANGDETECT) + +#### Authentication + +- **SC-AUTH-001**: CLI local-mode deployment MUST NOT require authentication (single-user, local network only). Multi-user service-mode deployment MUST authenticate via OAuth2 PKCE flow, support at minimum GitHub and Google as OIDC providers, issue tokens with configurable expiry (default 24 hours), and invalidate sessions on explicit logout. No unauthenticated requests accepted by the service-mode HTTP endpoints. + +### FR Coverage Gaps *(documented for tracking)* + +The following functional requirements currently have no associated success criterion. +SCs should be added during implementation planning when measurable targets can be defined: + +- **FR-009** (pluggable engines) β€” SC-ENGINE-001 added below. +- **FR-020** (OAuth2/OIDC authentication): SC-AUTH-001 added below. +- **FR-LANGQUERY** (language-agnostic semantic queries via `SemanticClass`) β€” SC-LANGQUERY-001 added below. + +Previously uncovered; SCs added in this review: FR-014 β†’ SC-PROV-001, FR-015 β†’ SC-SEARCH-001, +FR-019 β†’ SC-AUDIT-001 *(deferred)*, FR-LANGDETECT β†’ SC-LANGDETECT-001, FR-020 β†’ SC-AUTH-001, +FR-009 β†’ SC-ENGINE-001, FR-LANGQUERY β†’ SC-LANGQUERY-001, FR-027 β†’ SC-OBS-001. + ## Assumptions 1. **Primary Languages**: Initial support focuses on Rust, TypeScript/JavaScript, Python, Go (Tier 1 languages from CLAUDE.md) 2. **Data Source Priority**: Git-based repositories are primary data source, with local file system and cloud storage as secondary 3. **Conflict Types**: Focus on code merge conflicts, API breaking changes, and concurrent edit detection - not runtime conflicts or logic bugs 4. **Authentication**: Multi-user deployments use standard OAuth2/OIDC for authentication, delegating to existing identity providers -5. **Real-Time Protocol**: Custom RPC over HTTP streaming for real-time updates (unified with query API), with WebSocket/SSE as fallback options. RPC server-side streaming provides efficient real-time propagation for both CLI and edge deployments. Cloudflare Durable Objects expected for edge stateful operations (connection management, session state). Polling fallback for restrictive networks. +5. **API & Real-Time Protocol**: Query API uses prost Protobuf over plain HTTP POST (no Connect-RPC/gRPC framing). Real-time update propagation uses WebSocket (CLI, full-duplex) and SSE (edge, server-push) transports via the `thread-realtime` crate. Cloudflare Durable Objects required for edge stateful operations (connection management, session state) β€” implemented in commercial crate. Polling fallback for restrictive networks. 6. **Graph Granularity**: Multi-level graph representation (file -> class/module -> function/method -> symbol) for flexibility 7. **Conflict Detection Strategy**: Multi-tier progressive approach using all available detection methods (AST diff, semantic analysis, graph impact analysis) with intelligent routing. Fast methods provide immediate feedback, slower methods refine accuracy. Results update in real-time as better information becomes available, balancing speed with precision. 8. **Conflict Resolution**: System provides predictions and suggestions only - final resolution decisions remain with developers 9. **Performance Baseline**: "Real-time" defined as <1 second query response for typical developer workflow interactions 10. **Scalability Target**: Initial target is codebases up to 500k files, 10M nodes - can scale higher with infrastructure investment -11. **Engine Architecture**: Engines are swappable via well-defined interfaces, not runtime plugin loading (compile-time composition) +11. **Engine Pluggability**: Engines are swappable via compile-time composition only β€” not runtime plugin loading and not hot-swappable. The swap contract is: implement the relevant trait + register in `FactoryRegistry` + update config file + recompile. No orchestration code changes required. This is the intended workflow, not a limitation. 12. **Storage Strategy**: Multi-backend architecture with specialized purposes: Postgres (CLI primary, full ACID graph), D1 (edge primary, distributed graph), Vectorize (edge vector search), Qdrant (CLI-only vector search, optional). Content-addressed storage via ReCoco dataflow framework (per Constitution v2.0.0, Principle IV). ReCoco integration follows trait boundary pattern: Thread defines storage and dataflow interfaces, ReCoco provides implementations. This allows swapping ReCoco components or vendoring parts as needed. 13. **Deployment Model**: Single binary for both CLI and WASM with conditional compilation, not separate codebases. **Commercial Boundaries**: OSS includes core library with simple/limited WASM worker (Rust, Python, TypeScript). Full cloud deployment (comprehensive edge, managed service, advanced features) is commercial/paid. Architecture enables feature-flag-driven separation. 14. **Vendoring Strategy**: ReCoco components may be vendored (copied into Thread codebase) if cloud deployment requires customization or upstream changes conflict with Thread's stability requirements. Trait boundaries enable selective vendoring without architectural disruption. (Note: less critical now that ReCoco is Thread's own fork.) @@ -241,6 +323,8 @@ When a conflict is predicted, the system suggests resolution strategies based on - **Sync Strategy**: "Sync" is simply uploading/downloading immutable CAS chunks. No row-level merge logic required. - **Local-Only**: Postgres acts as the standalone CAS and State manager. - **Distributed**: D1 acts as the shared CAS; Real-Time Service manages ephemeral Deltas. +17. **Cross-Repository Consistency Model**: Cross-repository dependency links between separately indexed repositories use eventual consistency. During concurrent indexing of multiple repositories, cross-repo links may be briefly stale (pointing to an older version of the linked symbol) until the next incremental update cycle completes. Queries against stale cross-repo links return results with `cross_repo_stale: true` in the response envelope. This is acceptable β€” cross-repo links are updated opportunistically, not transactionally. *Informative target (not normative for OSS): cross-repo links are refreshed within 5 minutes of an incremental update cycle completing for the source repository.* +18. **Observability Model**: Structured logging (FR-027) is the primary observability mechanism. Edge deployment relies on Cloudflare Workers Logs with automated OTEL export β€” no manual distributed trace header propagation is required in the `thread-api` protocol. CLI deployment uses the `tracing` crate ecosystem. Metrics endpoints (FR-021) complement log-based observability but are not a substitute for it. ## Dependencies @@ -255,7 +339,7 @@ When a conflict is predicted, the system suggests resolution strategies based on 6. **Tree-sitter**: Underlying parser infrastructure for AST generation across multiple languages 7. **Concurrency Models**: Rayon for CLI parallelism, tokio for edge async I/O 8. **WASM Toolchain**: `xtask` build system for WASM compilation to Cloudflare Workers target -9. **Connect-RPC Framework**: Primary API protocol dependency (`connect-rs` or similar for Rust). Provides unified interface for queries and real-time updates across CLI and edge deployments with type safety. Must compile to WASM for Cloudflare Workers deployment. +9. **API Protocol**: `prost` runtime for Protobuf message encoding/decoding (no_std compatible, compiles to `wasm32-unknown-unknown`); `prost-build` as host-only code generation tool (never in WASM binary). TypeScript client codegen via `buf` CLI + `@bufbuild/protobuf` (protobuf-es v2). Transport: plain HTTP POST β€” no Connect-RPC or gRPC framing (Workers lack HTTP/2 trailer support). Internal Rust-to-Rust communication uses `postcard` (already in workspace). MCP server integration (future): `serde_json`/JSON-RPC 2.0 as separate transport adapter; prost types can optionally derive serde for MCP JSON output. 10. **Network Protocol**: Cloudflare Durable Objects required for edge stateful operations (connection management, session persistence, collaborative state). HTTP REST fallback if RPC proves infeasible. 11. **CodeWeaver Integration** (Optional): CodeWeaver's semantic characterization layer (sister project, currently Python) provides sophisticated code analysis capabilities. May port to Rust if superior to ast-grep-derived components. Evaluation pending ReCoco capability assessment. 12. **Graph Database**: Requires graph query capabilities - may need additional graph storage layer beyond relational DBs @@ -268,8 +352,9 @@ Thread follows a strict one-directional dependency rule: **commercial/private cr ### Component Classification | Component | Classification | Notes | -|-----------|---------------|-------| -| `thread-graph`, `thread-indexer`, `thread-conflict` | OSS | Core graph intelligence, no deployment dependency | +| ----------- | --------------- | ------- | +| `thread-graph`, `thread-indexer` | OSS | Core graph intelligence crates, no deployment dependency | +| `thread-conflict` | **Commercial/TBD** | Conflict detection is a proprietary differentiator; deferred to dedicated commercial design phase. Phase 4 tasks (T029–T038) in tasks.md are out of OSS scope. | | `thread-definitions` | OSS | Pure classification library, zero cloud dependency | | `thread-storage` (Postgres + D1 + Vectorize backends) | OSS | All three follow the D1 model β€” library backends, user-provided credentials | | `thread-api` (RPC types, Protobuf definitions) | OSS | Protocol definitions required for CLI and third-party clients | @@ -281,6 +366,18 @@ Thread follows a strict one-directional dependency rule: **commercial/private cr | Wrangler configurations, Worker entry points | **Private** | Deployment machinery in private repo | | R2 offload, Workers AI integrations | **Private** | Cloudflare proprietary services; commercial crate only | +### OSS β†’ Commercial Upgrade Path + +The OSS β†’ commercial upgrade is zero-re-index. The D1 schema is forward-compatible: + +1. Deploy the commercial Worker pointing at the existing OSS D1 database β€” no schema migration needed. +2. Add Durable Objects binding for `thread-realtime` (`RealtimeBackend` implementation). +3. Add Vectorize index binding (if upgrading from OSS without vector search). +4. Swap the OSS single Worker binary for the commercial Router Worker. +5. WebSocket connections are interrupted during the Worker swap (standard Cloudflare deployment behavior) but reconnect automatically. + +**No re-indexing is required.** All previously analyzed graph data in D1 is immediately available to the commercial Worker. The OSS D1 schema is a strict subset of the commercial schema β€” commercial Wrangler migrations are additive only. + ### Task Annotations Tasks in tasks.md that produce artifacts destined for the private commercial crate are annotated `[CF: private]`. Tasks that touch the boundary (OSS component + private integration) are annotated `[CF: boundary]`. @@ -315,7 +412,7 @@ None - all critical items have been addressed with reasonable defaults documente - Consider phased rollout: P1 (graph queries) -> P2 (conflict prediction) -> P3 (multi-source) -> P4 (AI resolution) to validate core value proposition early - **Commercial Architecture**: OSS/commercial boundaries must be designed from day one. OSS provides core library value (CLI + basic edge), commercial provides managed cloud service with advanced features. Architecture uses feature flags and conditional compilation to enable clean separation while maintaining single codebase. - **Component Evaluation Strategy**: Do NOT assume existing Thread components will be reused. First evaluate ReCoco capabilities comprehensively, then identify gaps, then decide on AST/semantic analysis components. CodeWeaver's semantic layer is a viable alternative to Thread's ast-grep-derived components. -- **MCP Server**: Implementation details and specification TBD. The AI knowledge layer will expose an MCP-compatible interface; the specific tool design, tier structure, and OSS/commercial scope require dedicated design work. +- **MCP Server**: Will be implemented as a separate `thread-mcp` crate using `rmcp-actix-web` (). Tool design, tier structure, and OSS/commercial scope require dedicated design work. **Structural requirement**: All types crossing the `thread-api` β†’ MCP boundary must derive `serde::Serialize`/`serde::Deserialize`. `prost`-generated types can satisfy this via serde feature flags or manual derives β€” verify compatibility before T017 finalizes proto definitions. MCP will NOT be hand-rolled; `thread-mcp` depends on `thread-api` but `thread-api` has no dependency on `thread-mcp`. ## Implementation Status @@ -326,7 +423,7 @@ None - all critical items have been addressed with reasonable defaults documente - **Content-Addressed Storage** (FR-004, FR-012): Blake3 fingerprinting via ReCoco, StorageBackend trait with Postgres and D1 backends - **Incremental Updates** (FR-008): IncrementalAnalyzer, DependencyGraph with BFS invalidation and topological sort - **Language Extractors**: Rust, TypeScript, Python, Go dependency extraction -- **ReCoco Integration**: Bridge pattern (bridge.rs), ThreadFlowBuilder DSL, CocoIndex operators for parse/symbols/imports/calls +- **ReCoco Integration (Scaffolded ⚠️)**: bridge.rs contains stub implementations only β€” all methods return empty results with TODO comments. ThreadFlowBuilder DSL and CocoIndex operator structure are in place. bridge.rs must be fully implemented before T-C10 (classify operator integration). - **CLI Deployment** (FR-011): Postgres backend fully operational - **Edge Storage** (FR-010 partial): D1 backend implemented @@ -337,7 +434,7 @@ None - all critical items have been addressed with reasonable defaults documente - **Real-Time Queries** (FR-005): No query API layer yet - **Conflict Detection** (FR-006, FR-007): Three-tier conflict detection system not started - **Multi-Source Indexing** (FR-003): Git, S3 sources not implemented -- **Connect-RPC API** (FR-016): No RPC layer +- **Graph API** (FR-016): No API layer yet β€” prost-generated Protobuf types and HTTP POST handlers not implemented - **Overlay Graph** (FR-017): Not implemented - **Semantic/Vector Search** (FR-015): Vectorize integration pending; Qdrant blocked by dependency conflict diff --git a/specs/001-realtime-code-graph/tasks.md b/specs/001-realtime-code-graph/tasks.md index f6f2f32..53331e4 100644 --- a/specs/001-realtime-code-graph/tasks.md +++ b/specs/001-realtime-code-graph/tasks.md @@ -38,12 +38,20 @@ See `crates/flow/` for the existing foundation. - `Classification` struct with class, rank, confidence, method fields - All serde derives with rename_all = "snake_case" +- [ ] **T-C08a**: Write failing classification accuracy test suite *(TDD: write tests before implementation)* + - Load all 27 language JSON fixture files from `tests/fixtures/` + - Assert β‰₯99% accuracy across all classified items β€” these tests MUST FAIL until T-C06 is complete + - Assert β‰₯80% accuracy on simulated "new language" (universal rules only) + - Add snapshot test stubs for representative node types (using insta) β€” stubs only, values TBD + - Property test stubs: every SemanticClass has a rank, every class has scores β€” stubs only + - **Gate**: Do NOT implement classifier (T-C06) until these tests are written and confirmed failing + - [ ] **T-C03**: Implement `error.rs` - `ClassifierError` enum with thiserror - Variants: DataLoadFailed, InvalidJson, InvalidToml, UnknownSemanticClass - [ ] **T-C04**: Implement `rules.rs` - - Deserialize `universal_rules.json` β†’ `HashMap, SemanticClass>` (exact) + majority maps + - Deserialize `universal_rules.json` β†’ `thread_utils::RapidMap, SemanticClass>` (exact) + majority maps - Deserialize `categories.json` β†’ category map - Deserialize TOML overrides per SupportLang language - String-keyed fallback map for non-SupportLang languages (best-effort coverage) @@ -90,12 +98,11 @@ See `crates/flow/` for the existing foundation. - **Note**: All source data already in repo β€” this is a move/transform operation. file_extensions.py has ~200 code languages, ~50 excluded dirs, ~60 excluded extensions. -- [ ] **T-C08**: Implement classification accuracy tests - - Port holdout evaluation: load all 27 language JSON files from tests/fixtures/ - - Assert β‰₯99% accuracy across all classified items - - Assert β‰₯80% accuracy on simulated "new language" (universal rules only, no overrides) - - Snapshot tests for representative node types per language (using insta) - - Property tests: every SemanticClass has a rank, every class has scores +- [ ] **T-C08b**: Verify and complete classification accuracy tests *(TDD: make failing tests pass)* + - Confirm all T-C08a tests now pass with classifier implementation from T-C06 + - Fill in snapshot test expected values (insta review) + - Confirm property tests pass + - Add any edge cases discovered during T-C06 implementation - [ ] **T-C09**: Update `GraphNode` to use `semantic_class` + `node_kind` - Remove `node_type: NodeType` enum field @@ -104,7 +111,11 @@ See `crates/flow/` for the existing foundation. - Update T011 implementation to populate both fields - Update any existing code referencing node_type +- [ ] **T-C09b**: Harden bridge.rs stubs β€” replace all `// TODO` stub returns with explicit `Err(ThreadFlowError::NotImplemented { method: "method_name" })`. Add `NotImplemented` variant to `ThreadFlowError`. Commit independently so stub behavior is auditable before classify operator work begins. + - **Constraint**: Zero silent empty returns permitted after this task. All stub calls must surface as errors in logs/tests. + - [ ] **T-C10**: Add `classify_node_types` operator to thread-flow pipeline + - ⚠️ **PREREQUISITE**: Before modifying bridge.rs, all existing stub methods MUST be updated to return `Err(ThreadFlowError::NotImplemented { method: "" })` instead of silent empty results. Silent empty stubs mask missing data as successful empty queries. This change MUST be committed and reviewed before any T-C10 work begins. (See T-C09b.) - New ReCoco operator: takes parsed AST nodes β†’ emits classification metadata - Call `classifier.classify(node.kind(), lang, kind, purpose, is_root, categories)` - Positioned between parse and extract_symbols steps in ThreadFlowBuilder @@ -130,7 +141,8 @@ See `crates/flow/` for the existing foundation. - [ ] T001 Create `crates/thread-graph` β€” extend/wrap `crates/flow/src/incremental/graph.rs` (do NOT reimplement graph algorithms; expose as thread-graph public API) - [ ] T002 Create `crates/thread-indexer` β€” build on `crates/flow/src/incremental/analyzer.rs` and `extractors/` (do NOT reimplement change detection or language extraction) -- [ ] T003 Create `crates/thread-conflict` with `lib.rs` and `Cargo.toml` +- [ ] T003 `[CF: boundary]` Create `crates/thread-conflict` stub crate with `lib.rs` and `Cargo.toml` β€” **OSS stub only**: crate exists to satisfy dependency declarations but contains no detection logic. All implementation is Commercial/Deferred (see Phase 4 notes). Detection logic (T029–T038) is not part of this task and must not be implemented here. + > The shared types (`ConflictPrediction`, `ConflictType`, `Severity`, `DetectionTier`, `ConflictStatus`) are defined in `thread-api/src/types.rs`, not in `thread-conflict`. T003 creates an empty stub crate only β€” no type definitions needed here. - [ ] T004 Create `crates/thread-storage` β€” re-export and extend `crates/flow/src/incremental/storage.rs` (StorageBackend trait and backends already implemented in thread-flow) - [ ] T005 Create `crates/thread-api` with `lib.rs` and `Cargo.toml` - [ ] T006 Create `crates/thread-realtime` with `lib.rs` and `Cargo.toml` @@ -144,12 +156,27 @@ See `crates/flow/` for the existing foundation. - [ ] T011 Implement rich `GraphNode` and `GraphEdge` structs with semantic metadata in `crates/thread-graph/` β€” NOTE: thread-flow has minimal `DependencyEdge`; the full semantic model (NodeType, SemanticMetadata, EdgeType enum with Calls/Inherits/etc.) still needs to be built - ⚠️ DEPENDS ON T-C09: GraphNode now uses `semantic_class: SemanticClass` + `node_kind: Option>` instead of `node_type: NodeType` enum. Complete T-C01 through T-C09 before implementing T011. +- [ ] **T011a**: Implement NodeId determinism property tests in `crates/thread-graph/tests/node_id_tests.rs` + - Property: `NodeId::from_content(x) == NodeId::from_content(x)` always holds (same input = same output) + - Property: `NodeId::from_content(x) != NodeId::from_content(y)` for all x β‰  y in test corpus + - Fuzz test: normalized signature whitespace variations produce identical NodeId + - Test: any change to normalization logic in `NodeId::from_content` fails at least one test + - **Required before T012** β€” graph invariants depend on NodeId stability - [ ] T012 Implement `Graph` container in `crates/thread-graph/` as a semantic graph layer β€” NOTE: `DependencyGraph` for incremental analysis already exists in thread-flow; this is the richer symbol-level graph +- [ ] **T012a**: Document `thread-graph` public API surface in `specs/001-realtime-code-graph/contracts/thread-graph-api.md` + - Record all public traits, structs, and functions exposed by `crates/thread-graph/` + - Include: `Graph`, `GraphNode`, `GraphEdge`, `OverlayView` trait, traversal functions, `NodeId`/`EdgeId` constructors + - This document becomes the ground truth for contract tests (T027) + - **Gate**: T027 (graph_storage integration test) MUST NOT be written until T012a exists + - **Timing**: Written after T011/T012 TDD cycle stabilizes the API, before T027 begins - [x] T013 ~~Implement `CasStorage` trait~~ β€” DONE: `StorageBackend` trait (async, full CRUD) implemented in `crates/flow/src/incremental/storage.rs`. Thread-storage crate should re-export this. - [x] T014 ~~Implement `PostgresCas`~~ β€” DONE: `PostgresIncrementalBackend` implemented in `crates/flow/src/incremental/backends/postgres.rs`. Feature-gated behind `postgres-backend`. - [x] T015 ~~Implement `D1Cas`~~ β€” DONE: `D1IncrementalBackend` implemented in `crates/flow/src/incremental/backends/d1.rs`. HTTP REST client for Cloudflare D1 API. + > ⚠️ **Deprecation intent**: `D1IncrementalBackend` uses the D1 REST API and cannot meet the SC-STORE-001 <50ms p95 latency target due to the extra HTTP hop. It is retained for CI/CD tooling and external tooling use only. Production edge deployment MUST use `D1NativeBackend` (T016b, native `worker::D1Database` binding). `D1IncrementalBackend` will be deprecated once `D1NativeBackend` passes integration tests and benchmarks meet SC-STORE-001. - [ ] T016 Implement `VectorizeStorage` for Cloudflare Vectorize API in `crates/thread-storage/src/vectorize.rs` β€” REPLACES QdrantStorage for edge deployment. NOTE: `recoco/target-qdrant` is currently disabled due to a CRC dependency conflict. Qdrant support (CLI-only) can be added later when the conflict is resolved. `[CF: OSS β€” follows D1 model; user-provided Cloudflare credentials]` -- [ ] T017 Define Connect-RPC Protobuf definitions (.proto) in `crates/thread-api/proto/` and configure generation +- [ ] **T016b**: Implement `D1NativeBackend` using `worker::D1Database` native binding in `crates/thread-storage/src/d1_native.rs` β€” required to meet SC-STORE-001 <50ms p95 target. Both `D1IncrementalBackend` and `D1NativeBackend` implement `StorageBackend` trait. `[CF: OSS]` + - **Test strategy**: Use `miniflare` (local Cloudflare Workers runtime emulator) for unit and integration tests. CI integration tests require a Cloudflare staging environment via `wrangler dev`. Do not attempt to test `worker::D1Database` native bindings outside a Worker runtime context β€” they will not compile for the native target. +- [ ] T017 Define Protobuf message types (.proto) in `crates/thread-api/proto/v1/` and configure `prost` code generation. All `.proto` files use `package thread.v1;` namespace. Add `buf.yaml` and `buf.gen.yaml` for TypeScript client codegen targeting `@bufbuild/protobuf` (protobuf-es v2). Transport: plain HTTP POST β€” no Connect-RPC/gRPC framing. Document version bump policy in `crates/thread-api/proto/README.md`: field additions are backward-compatible; removing/renumbering fields or changing field types incompatibly requires a new `v2/` directory. Define conflict protocol types (`ConflictPrediction`, `ConflictType`, `Severity`, `DetectionTier`, `ConflictStatus`, `ResolutionStrategy`) in `crates/thread-api/src/types.rs` as Rust structs. These are shared between OSS `thread-api` and commercial `thread-conflict`. - [x] T018 ~~Implement CocoIndex dataflow traits~~ β€” DONE: ReCoco integration implemented via bridge pattern: - `crates/flow/src/bridge.rs`: `CocoIndexAnalyzer` adapter - `crates/flow/src/flows/builder.rs`: `ThreadFlowBuilder` DSL @@ -167,27 +194,42 @@ See `crates/flow/` for the existing foundation. - [ ] T023 [US1] Implement `OverlayGraph` struct (merging Base + Delta) in `crates/thread-graph/src/overlay.rs` - [ ] T024 [P] [US1] Implement `D1GraphIterator` for streaming access in `crates/thread-storage/src/d1.rs` - [ ] T025 [US1] Expose graph traversal API in `crates/thread-graph/src/traversal.rs` β€” NOTE: BFS/topological sort/cycle detection already implemented in `crates/flow/src/incremental/graph.rs`; this wraps it in thread-graph's public API -- [ ] T026 [US1] Implement Connect-RPC query handlers in `crates/thread-api/src/connect_rpc.rs` +- [ ] T026 [US1] Implement HTTP POST query handlers in `crates/thread-api/src/handlers.rs` using prost Protobuf encoding (plain HTTP POST, no Connect-RPC/gRPC framing) - [ ] T026a [US1] Implement Circuit Breaker logic for data sources in `crates/thread-indexer/src/circuit_breaker.rs` - [ ] T026b [US1] Implement Partial Graph Result Envelope in `crates/thread-api/src/response.rs` - [ ] T027 [US1] Create integration test `tests/integration/graph_storage.rs` verifying graph persistence - [ ] T028 [US1] Expose graph query API in `crates/thread-wasm/src/api_bindings.rs` -## Phase 4: User Story 2 - Conflict Prediction (P2) +## Phase 4: User Story 2 - Conflict Prediction (P2) β€” ⚠️ DEFERRED: Commercial Scope + +> **Note**: Conflict detection is a commercial differentiator. Tasks T029–T038 are deferred to a +> separate commercial design phase and will NOT be implemented in the OSS repository. The +> `thread-conflict` crate is classified as Commercial/TBD in spec.md. Tasks are preserved here +> for planning context and future commercial design input. +> +> The `ReachabilityIndex` infrastructure (T034) is OSS β€” the index lives in `thread-storage`. +> However, the conflict detection logic that consumes it (T029–T033, T036–T038) is commercial. + **Goal**: Detect merge conflicts before commit using multi-tier analysis. **Independent Test**: Simulate concurrent changes to related files and verify conflict alert. -- [ ] T029 [P] [US2] Create benchmark `tests/benchmarks/conflict_detection.rs` -- [ ] T030 [US2] Implement `ConflictPrediction` struct in `crates/thread-conflict/src/types.rs` -- [ ] T030a [US2] Implement `Delta` struct (representing local changes) in `crates/thread-graph/src/delta.rs` -- [ ] T031 [US2] Implement Tier 1 AST diff detection in `crates/thread-conflict/src/tier1_ast.rs` -- [ ] T032 [US2] Implement Tier 2 Structural analysis in `crates/thread-conflict/src/tier2_structural.rs` -- [ ] T033 [US2] Implement Tier 3 Semantic analysis in `crates/thread-conflict/src/tier3_semantic.rs` -- [ ] T034 [US2] Implement `ReachabilityIndex` logic for D1 in `crates/thread-storage/src/d1_reachability.rs` -- [ ] T035 [US2] Implement WebSocket/SSE notification logic in `crates/thread-realtime/src/websocket.rs` -- [ ] T036 [US2] Implement `ProgressiveConflictDetector` in `crates/thread-conflict/src/progressive.rs` -- [ ] T037 [US2] Create integration test `tests/integration/realtime_conflict.rs` -- [ ] T038 [US2] Expose conflict detection API in `crates/thread-wasm/src/realtime_bindings.rs` +- [ ] T029 [P] [US2] [DEFERRED: commercial scope, separate design required] Create benchmark `tests/benchmarks/conflict_detection.rs` +- [ ] T030 [US2] [DEFERRED: commercial scope, separate design required] Implement `ConflictPrediction` struct in `crates/thread-conflict/src/types.rs` +- [ ] T030a [US2] [DEFERRED: commercial scope, separate design required] Implement `Delta` struct (representing local changes) in `crates/thread-graph/src/delta.rs` +- [ ] T031 [US2] [DEFERRED: commercial scope, separate design required] Implement Tier 1 AST diff detection in `crates/thread-conflict/src/tier1_ast.rs` +- [ ] T032 [US2] [DEFERRED: commercial scope, separate design required] Implement Tier 2 Structural analysis in `crates/thread-conflict/src/tier2_structural.rs` +- [ ] T033 [US2] [DEFERRED: commercial scope, separate design required] Implement Tier 3 Semantic analysis in `crates/thread-conflict/src/tier3_semantic.rs` +- [ ] T034 [US2] Implement `ReachabilityIndex` infrastructure in `crates/thread-storage/src/d1_reachability.rs` β€” OSS; k-hop bounded (k=3 default); tracks committed baseline. Conflict detection logic is commercial but the index itself is OSS. +- [ ] T035 [US2] Implement WebSocket/SSE notification logic in `crates/thread-realtime/src/websocket.rs` β€” OSS transport layer +- [ ] **T035b** `[BACKLOG]` Implement in-memory replay buffer for `thread-realtime` CLI WebSocket server + - Configurable retention window (default: 5 minutes, max: 30 minutes) + - Bounded by message count (max 500 messages) and memory (max 10MB) + - Activated by `RequestMissedUpdates` client message + - **Not blocked**: T035 (core WebSocket transport) proceeds without this + - **Commercial equivalent**: Durable Objects replay (T-CF01 scope) +- [ ] T036 [US2] [DEFERRED: commercial scope, separate design required] Implement `ProgressiveConflictDetector` in `crates/thread-conflict/src/progressive.rs` +- [ ] T037 [US2] [DEFERRED: commercial scope, separate design required] Create integration test `tests/integration/realtime_conflict.rs` +- [ ] T038 [US2] [DEFERRED: commercial scope, separate design required] Expose conflict detection API in `crates/thread-wasm/src/realtime_bindings.rs` ## Phase 5: User Story 3 - Multi-Source Code Intelligence (P3) **Goal**: Unified graph across multiple repositories and sources. @@ -197,18 +239,23 @@ See `crates/flow/` for the existing foundation. - [ ] T040 [US3] Implement `LocalSource` in `crates/thread-indexer/src/sources/local.rs` - [ ] T041 [P] [US3] Implement `S3Source` in `crates/thread-indexer/src/sources/s3.rs` - [ ] T042 [US3] Implement cross-repository dependency linking in `crates/thread-graph/src/linking.rs` + > **Consistency**: Eventual consistency model. Cross-repo links are computed after each incremental update cycle. Stale links are flagged with `cross_repo_stale: true` in query responses. No distributed transaction or snapshot isolation required. - [ ] T043 [US3] Update `ThreadBuildGraphFunction` to handle multiple sources - [ ] T044 [US3] Create integration test `tests/integration/multi_source.rs` -## Phase 6: User Story 4 - AI-Assisted Conflict Resolution (P4) +## Phase 6: User Story 4 - AI-Assisted Conflict Resolution (P4) β€” ⚠️ DEFERRED: Commercial Scope + +> **Note**: AI-assisted conflict resolution depends on conflict detection (Phase 4, also deferred). +> Tasks T045–T049 are deferred to the commercial design phase along with thread-conflict. + **Goal**: Suggest resolution strategies for detected conflicts. **Independent Test**: Create conflict and verify resolution suggestion output. -- [ ] T045 [US4] Implement `ResolutionStrategy` types in `crates/thread-conflict/src/resolution.rs` -- [ ] T046 [US4] Implement heuristic-based resolution suggestions in `crates/thread-conflict/src/heuristics.rs` -- [ ] T047 [US4] Implement semantic compatibility checks in `crates/thread-conflict/src/compatibility.rs` -- [ ] T048 [US4] Update `ConflictPrediction` to include resolution strategies -- [ ] T049 [US4] Add resolution tests in `crates/thread-conflict/tests/resolution_tests.rs` +- [ ] T045 [US4] [DEFERRED: commercial scope, requires thread-conflict] Implement `ResolutionStrategy` types in `crates/thread-conflict/src/resolution.rs` +- [ ] T046 [US4] [DEFERRED: commercial scope, requires thread-conflict] Implement heuristic-based resolution suggestions in `crates/thread-conflict/src/heuristics.rs` +- [ ] T047 [US4] [DEFERRED: commercial scope, requires thread-conflict] Implement semantic compatibility checks in `crates/thread-conflict/src/compatibility.rs` +- [ ] T048 [US4] [DEFERRED: commercial scope, requires thread-conflict] Update `ConflictPrediction` to include resolution strategies +- [ ] T049 [US4] [DEFERRED: commercial scope, requires thread-conflict] Add resolution tests in `crates/thread-conflict/tests/resolution_tests.rs` ## Phase 7: Polish & Cross-Cutting **Goal**: Performance tuning, documentation, and final verification.